diff options
Diffstat (limited to 'contrib/pageinspect/btreefuncs.c')
-rw-r--r-- | contrib/pageinspect/btreefuncs.c | 298 |
1 files changed, 225 insertions, 73 deletions
diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c index 9375d55e149..e4e5dc3c81e 100644 --- a/contrib/pageinspect/btreefuncs.c +++ b/contrib/pageinspect/btreefuncs.c @@ -46,17 +46,13 @@ PG_FUNCTION_INFO_V1(bt_page_items); PG_FUNCTION_INFO_V1(bt_page_items_bytea); PG_FUNCTION_INFO_V1(bt_page_stats_1_9); PG_FUNCTION_INFO_V1(bt_page_stats); +PG_FUNCTION_INFO_V1(bt_multi_page_stats); #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX) #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID) #define DatumGetItemPointer(X) ((ItemPointer) DatumGetPointer(X)) #define ItemPointerGetDatum(X) PointerGetDatum(X) -/* note: BlockNumber is unsigned, hence can't be negative */ -#define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \ - if ( RelationGetNumberOfBlocks(rel) <= (BlockNumber) (blkno) ) \ - elog(ERROR, "block number out of range"); } - /* ------------------------------------------------ * structure for single btree page statistics * ------------------------------------------------ @@ -80,6 +76,29 @@ typedef struct BTPageStat BTCycleId btpo_cycleid; } BTPageStat; +/* + * cross-call data structure for SRF for page stats + */ +typedef struct ua_page_stats +{ + Oid relid; + int64 blkno; + int64 blk_count; + bool allpages; +} ua_page_stats; + +/* + * cross-call data structure for SRF for page items + */ +typedef struct ua_page_items +{ + Page page; + OffsetNumber offset; + bool leafpage; + bool rightmost; + TupleDesc tupd; +} ua_page_items; + /* ------------------------------------------------- * GetBTPageStatistics() @@ -177,34 +196,38 @@ GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat) } /* ----------------------------------------------- - * bt_page_stats() + * check_relation_block_range() * - * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1); + * Verify that a block number (given as int64) is valid for the relation. * ----------------------------------------------- */ -static Datum -bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version) +static void +check_relation_block_range(Relation rel, int64 blkno) { - text *relname = PG_GETARG_TEXT_PP(0); - int64 blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1)); - Buffer buffer; - Relation rel; - RangeVar *relrv; - Datum result; - HeapTuple tuple; - TupleDesc tupleDesc; - int j; - char *values[11]; - BTPageStat stat; - - if (!superuser()) + /* Ensure we can cast to BlockNumber */ + if (blkno < 0 || blkno > MaxBlockNumber) ereport(ERROR, - (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), - errmsg("must be superuser to use pageinspect functions"))); + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid block number %lld", + (long long) blkno))); - relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); - rel = relation_openrv(relrv, AccessShareLock); + if ((BlockNumber) (blkno) >= RelationGetNumberOfBlocks(rel)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("block number %lld is out of range", + (long long) blkno))); +} +/* ----------------------------------------------- + * bt_index_block_validate() + * + * Validate index type is btree and block number + * is valid (and not the metapage). + * ----------------------------------------------- + */ +static void +bt_index_block_validate(Relation rel, int64 blkno) +{ if (!IS_INDEX(rel) || !IS_BTREE(rel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -221,17 +244,45 @@ bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version) (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); - if (blkno < 0 || blkno > MaxBlockNumber) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid block number"))); - if (blkno == 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("block 0 is a meta page"))); - CHECK_RELATION_BLOCK_RANGE(rel, blkno); + check_relation_block_range(rel, blkno); +} + +/* ----------------------------------------------- + * bt_page_stats() + * + * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1); + * Arguments are index relation name and block number + * ----------------------------------------------- + */ +static Datum +bt_page_stats_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version) +{ + text *relname = PG_GETARG_TEXT_PP(0); + int64 blkno = (ext_version == PAGEINSPECT_V1_8 ? PG_GETARG_UINT32(1) : PG_GETARG_INT64(1)); + Buffer buffer; + Relation rel; + RangeVar *relrv; + Datum result; + HeapTuple tuple; + TupleDesc tupleDesc; + int j; + char *values[11]; + BTPageStat stat; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use pageinspect functions"))); + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + bt_index_block_validate(rel, blkno); buffer = ReadBuffer(rel, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); @@ -284,17 +335,144 @@ bt_page_stats(PG_FUNCTION_ARGS) } -/* - * cross-call data structure for SRF +/* ----------------------------------------------- + * bt_multi_page_stats() + * + * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1, 2); + * Arguments are index relation name, first block number, number of blocks + * (but number of blocks can be negative to mean "read all the rest") + * ----------------------------------------------- */ -struct user_args +Datum +bt_multi_page_stats(PG_FUNCTION_ARGS) { - Page page; - OffsetNumber offset; - bool leafpage; - bool rightmost; - TupleDesc tupd; -}; + Relation rel; + ua_page_stats *uargs; + FuncCallContext *fctx; + MemoryContext mctx; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use pageinspect functions"))); + + if (SRF_IS_FIRSTCALL()) + { + text *relname = PG_GETARG_TEXT_PP(0); + int64 blkno = PG_GETARG_INT64(1); + int64 blk_count = PG_GETARG_INT64(2); + RangeVar *relrv; + + fctx = SRF_FIRSTCALL_INIT(); + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + /* Check that rel is a valid btree index and 1st block number is OK */ + bt_index_block_validate(rel, blkno); + + /* + * Check if upper bound of the specified range is valid. If only one + * page is requested, skip as we've already validated the page. (Also, + * it's important to skip this if blk_count is negative.) + */ + if (blk_count > 1) + check_relation_block_range(rel, blkno + blk_count - 1); + + /* Save arguments for reuse */ + mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); + + uargs = palloc(sizeof(ua_page_stats)); + + uargs->relid = RelationGetRelid(rel); + uargs->blkno = blkno; + uargs->blk_count = blk_count; + uargs->allpages = (blk_count < 0); + + fctx->user_fctx = uargs; + + MemoryContextSwitchTo(mctx); + + /* + * To avoid possibly leaking a relcache reference if the SRF isn't run + * to completion, we close and re-open the index rel each time + * through, using the index's OID for re-opens to ensure we get the + * same rel. Keep the AccessShareLock though, to ensure it doesn't go + * away underneath us. + */ + relation_close(rel, NoLock); + } + + fctx = SRF_PERCALL_SETUP(); + uargs = fctx->user_fctx; + + /* We should have lock already */ + rel = relation_open(uargs->relid, NoLock); + + /* In all-pages mode, recheck the index length each time */ + if (uargs->allpages) + uargs->blk_count = RelationGetNumberOfBlocks(rel) - uargs->blkno; + + if (uargs->blk_count > 0) + { + /* We need to fetch next block statistics */ + Buffer buffer; + Datum result; + HeapTuple tuple; + int j; + char *values[11]; + BTPageStat stat; + TupleDesc tupleDesc; + + buffer = ReadBuffer(rel, uargs->blkno); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + /* keep compiler quiet */ + stat.btpo_prev = stat.btpo_next = InvalidBlockNumber; + stat.btpo_flags = stat.free_size = stat.avg_item_size = 0; + + GetBTPageStatistics(uargs->blkno, buffer, &stat); + + UnlockReleaseBuffer(buffer); + relation_close(rel, NoLock); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + j = 0; + values[j++] = psprintf("%u", stat.blkno); + values[j++] = psprintf("%c", stat.type); + values[j++] = psprintf("%u", stat.live_items); + values[j++] = psprintf("%u", stat.dead_items); + values[j++] = psprintf("%u", stat.avg_item_size); + values[j++] = psprintf("%u", stat.page_size); + values[j++] = psprintf("%u", stat.free_size); + values[j++] = psprintf("%u", stat.btpo_prev); + values[j++] = psprintf("%u", stat.btpo_next); + values[j++] = psprintf("%u", stat.btpo_level); + values[j++] = psprintf("%d", stat.btpo_flags); + + /* Construct tuple to be returned */ + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), + values); + + result = HeapTupleGetDatum(tuple); + + /* + * Move to the next block number and decrement the number of blocks + * still to be fetched + */ + uargs->blkno++; + uargs->blk_count--; + + SRF_RETURN_NEXT(fctx, result); + } + + /* Done, so finally we can release the index lock */ + relation_close(rel, AccessShareLock); + SRF_RETURN_DONE(fctx); +} /*------------------------------------------------------- * bt_page_print_tuples() @@ -303,7 +481,7 @@ struct user_args * ------------------------------------------------------ */ static Datum -bt_page_print_tuples(struct user_args *uargs) +bt_page_print_tuples(ua_page_items *uargs) { Page page = uargs->page; OffsetNumber offset = uargs->offset; @@ -453,7 +631,7 @@ bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version) Datum result; FuncCallContext *fctx; MemoryContext mctx; - struct user_args *uargs; + ua_page_items *uargs; if (!superuser()) ereport(ERROR, @@ -473,33 +651,7 @@ bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version) relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); - if (!IS_INDEX(rel) || !IS_BTREE(rel)) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("\"%s\" is not a %s index", - RelationGetRelationName(rel), "btree"))); - - /* - * Reject attempts to read non-local temporary relations; we would be - * likely to get wrong data since we have no visibility into the - * owning session's local buffers. - */ - if (RELATION_IS_OTHER_TEMP(rel)) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("cannot access temporary tables of other sessions"))); - - if (blkno < 0 || blkno > MaxBlockNumber) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid block number"))); - - if (blkno == 0) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("block 0 is a meta page"))); - - CHECK_RELATION_BLOCK_RANGE(rel, blkno); + bt_index_block_validate(rel, blkno); buffer = ReadBuffer(rel, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); @@ -511,7 +663,7 @@ bt_page_items_internal(PG_FUNCTION_ARGS, enum pageinspect_version ext_version) */ mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); - uargs = palloc(sizeof(struct user_args)); + uargs = palloc(sizeof(ua_page_items)); uargs->page = palloc(BLCKSZ); memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ); @@ -587,7 +739,7 @@ bt_page_items_bytea(PG_FUNCTION_ARGS) bytea *raw_page = PG_GETARG_BYTEA_P(0); Datum result; FuncCallContext *fctx; - struct user_args *uargs; + ua_page_items *uargs; if (!superuser()) ereport(ERROR, @@ -603,7 +755,7 @@ bt_page_items_bytea(PG_FUNCTION_ARGS) fctx = SRF_FIRSTCALL_INIT(); mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); - uargs = palloc(sizeof(struct user_args)); + uargs = palloc(sizeof(ua_page_items)); uargs->page = get_page_from_raw(raw_page); |