From 0dca5d68d7bebf2c1036fd84875533afef6df992 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 2 Apr 2025 14:05:50 -0400 Subject: Change SQL-language functions to use the plan cache. In the historical implementation of SQL functions (if they don't get inlined), we built plans for all the contained queries at first call within an outer query, and then re-used those plans for the duration of the outer query, and then forgot everything. This was not ideal, not least because the plans could not be customized to specific values of the function's parameters. Our plancache infrastructure seems mature enough to be used here. That will solve both the problem with not being able to build custom plans and the problem with not being able to share work across successive outer queries. Aside from those performance concerns, this change fixes a longstanding bugaboo with SQL functions: you could not write DDL that would affect later statements in the same function. That's mostly still true with new-style SQL functions, since the results of parse analysis are baked into the stored query trees (and protected by dependency records). But for old-style SQL functions, it will now work much as it does with PL/pgSQL functions, because we delay parse analysis and planning of each query until we're ready to run it. Some edge cases that require replanning are now handled better too; see for example the new rowsecurity test, where we now detect an RLS context change that was previously missed. One other edge-case change that might be worthy of a release note is that we now insist that a SQL function's result be generated by the physically-last query within it. Previously, if the last original query was deleted by a DO INSTEAD NOTHING rule, we'd be willing to take the result from the preceding query instead. This behavior was undocumented except in source-code comments, and it seems hard to believe that anyone's relying on it. Along the way to this feature, we needed a few infrastructure changes: * The plancache can now take either a raw parse tree or an analyzed-but-not-rewritten Query as the starting point for a CachedPlanSource. If given a Query, it is caller's responsibility that nothing will happen to invalidate that form of the query. We use this for new-style SQL functions, where what's in pg_proc is serialized Query(s) and we trust the dependency mechanism to disallow DDL that would break those. * The plancache now offers a way to invoke a post-rewrite callback to examine/modify the rewritten parse tree when it is rebuilding the parse trees after a cache invalidation. We need this because SQL functions sometimes adjust the parse tree to make its output exactly match the declared result type; if the plan gets rebuilt, that has to be re-done. * There is a new backend module utils/cache/funccache.c that abstracts the idea of caching data about a specific function usage (a particular function and set of input data types). The code in it is moved almost verbatim from PL/pgSQL, which has done that for a long time. We use that logic now for SQL-language functions too, and maybe other PLs will have use for it in the future. Author: Alexander Pyhalov Co-authored-by: Tom Lane Reviewed-by: Pavel Stehule Discussion: https://postgr.es/m/8216639.NyiUUSuA9g@aivenlaptop --- src/backend/utils/cache/plancache.c | 191 +++++++++++++++++++++++++++++------- 1 file changed, 153 insertions(+), 38 deletions(-) (limited to 'src/backend/utils/cache/plancache.c') diff --git a/src/backend/utils/cache/plancache.c b/src/backend/utils/cache/plancache.c index 6c2979d5c82..3b681647060 100644 --- a/src/backend/utils/cache/plancache.c +++ b/src/backend/utils/cache/plancache.c @@ -14,7 +14,7 @@ * Cache invalidation is driven off sinval events. Any CachedPlanSource * that matches the event is marked invalid, as is its generic CachedPlan * if it has one. When (and if) the next demand for a cached plan occurs, - * parse analysis and rewrite is repeated to build a new valid query tree, + * parse analysis and/or rewrite is repeated to build a new valid query tree, * and then planning is performed as normal. We also force re-analysis and * re-planning if the active search_path is different from the previous time * or, if RLS is involved, if the user changes or the RLS environment changes. @@ -63,6 +63,7 @@ #include "nodes/nodeFuncs.h" #include "optimizer/optimizer.h" #include "parser/analyze.h" +#include "rewrite/rewriteHandler.h" #include "storage/lmgr.h" #include "tcop/pquery.h" #include "tcop/utility.h" @@ -74,18 +75,6 @@ #include "utils/syscache.h" -/* - * We must skip "overhead" operations that involve database access when the - * cached plan's subject statement is a transaction control command or one - * that requires a snapshot not to be set yet (such as SET or LOCK). More - * generally, statements that do not require parse analysis/rewrite/plan - * activity never need to be revalidated, so we can treat them all like that. - * For the convenience of postgres.c, treat empty statements that way too. - */ -#define StmtPlanRequiresRevalidation(plansource) \ - ((plansource)->raw_parse_tree != NULL && \ - stmt_requires_parse_analysis((plansource)->raw_parse_tree)) - /* * This is the head of the backend's list of "saved" CachedPlanSources (i.e., * those that are in long-lived storage and are examined for sinval events). @@ -100,6 +89,8 @@ static dlist_head saved_plan_list = DLIST_STATIC_INIT(saved_plan_list); static dlist_head cached_expression_list = DLIST_STATIC_INIT(cached_expression_list); static void ReleaseGenericPlan(CachedPlanSource *plansource); +static bool StmtPlanRequiresRevalidation(CachedPlanSource *plansource); +static bool BuildingPlanRequiresSnapshot(CachedPlanSource *plansource); static List *RevalidateCachedQuery(CachedPlanSource *plansource, QueryEnvironment *queryEnv, bool release_generic); @@ -166,7 +157,7 @@ InitPlanCache(void) } /* - * CreateCachedPlan: initially create a plan cache entry. + * CreateCachedPlan: initially create a plan cache entry for a raw parse tree. * * Creation of a cached plan is divided into two steps, CreateCachedPlan and * CompleteCachedPlan. CreateCachedPlan should be called after running the @@ -220,6 +211,7 @@ CreateCachedPlan(RawStmt *raw_parse_tree, plansource = (CachedPlanSource *) palloc0(sizeof(CachedPlanSource)); plansource->magic = CACHEDPLANSOURCE_MAGIC; plansource->raw_parse_tree = copyObject(raw_parse_tree); + plansource->analyzed_parse_tree = NULL; plansource->query_string = pstrdup(query_string); MemoryContextSetIdentifier(source_context, plansource->query_string); plansource->commandTag = commandTag; @@ -227,6 +219,8 @@ CreateCachedPlan(RawStmt *raw_parse_tree, plansource->num_params = 0; plansource->parserSetup = NULL; plansource->parserSetupArg = NULL; + plansource->postRewrite = NULL; + plansource->postRewriteArg = NULL; plansource->cursor_options = 0; plansource->fixed_result = false; plansource->resultDesc = NULL; @@ -255,6 +249,34 @@ CreateCachedPlan(RawStmt *raw_parse_tree, return plansource; } +/* + * CreateCachedPlanForQuery: initially create a plan cache entry for a Query. + * + * This is used in the same way as CreateCachedPlan, except that the source + * query has already been through parse analysis, and the plancache will never + * try to re-do that step. + * + * Currently this is used only for new-style SQL functions, where we have a + * Query from the function's prosqlbody, but no source text. The query_string + * is typically empty, but is required anyway. + */ +CachedPlanSource * +CreateCachedPlanForQuery(Query *analyzed_parse_tree, + const char *query_string, + CommandTag commandTag) +{ + CachedPlanSource *plansource; + MemoryContext oldcxt; + + /* Rather than duplicating CreateCachedPlan, just do this: */ + plansource = CreateCachedPlan(NULL, query_string, commandTag); + oldcxt = MemoryContextSwitchTo(plansource->context); + plansource->analyzed_parse_tree = copyObject(analyzed_parse_tree); + MemoryContextSwitchTo(oldcxt); + + return plansource; +} + /* * CreateOneShotCachedPlan: initially create a one-shot plan cache entry. * @@ -289,12 +311,15 @@ CreateOneShotCachedPlan(RawStmt *raw_parse_tree, plansource = (CachedPlanSource *) palloc0(sizeof(CachedPlanSource)); plansource->magic = CACHEDPLANSOURCE_MAGIC; plansource->raw_parse_tree = raw_parse_tree; + plansource->analyzed_parse_tree = NULL; plansource->query_string = query_string; plansource->commandTag = commandTag; plansource->param_types = NULL; plansource->num_params = 0; plansource->parserSetup = NULL; plansource->parserSetupArg = NULL; + plansource->postRewrite = NULL; + plansource->postRewriteArg = NULL; plansource->cursor_options = 0; plansource->fixed_result = false; plansource->resultDesc = NULL; @@ -464,6 +489,29 @@ CompleteCachedPlan(CachedPlanSource *plansource, plansource->is_valid = true; } +/* + * SetPostRewriteHook: set a hook to modify post-rewrite query trees + * + * Some callers have a need to modify the query trees between rewriting and + * planning. In the initial call to CompleteCachedPlan, it's assumed such + * work was already done on the querytree_list. However, if we're forced + * to replan, it will need to be done over. The caller can set this hook + * to provide code to make that happen. + * + * postRewriteArg is just passed verbatim to the hook. As with parserSetupArg, + * it is caller's responsibility that the referenced data remains + * valid for as long as the CachedPlanSource exists. + */ +void +SetPostRewriteHook(CachedPlanSource *plansource, + PostRewriteHook postRewrite, + void *postRewriteArg) +{ + Assert(plansource->magic == CACHEDPLANSOURCE_MAGIC); + plansource->postRewrite = postRewrite; + plansource->postRewriteArg = postRewriteArg; +} + /* * SaveCachedPlan: save a cached plan permanently * @@ -566,6 +614,42 @@ ReleaseGenericPlan(CachedPlanSource *plansource) } } +/* + * We must skip "overhead" operations that involve database access when the + * cached plan's subject statement is a transaction control command or one + * that requires a snapshot not to be set yet (such as SET or LOCK). More + * generally, statements that do not require parse analysis/rewrite/plan + * activity never need to be revalidated, so we can treat them all like that. + * For the convenience of postgres.c, treat empty statements that way too. + */ +static bool +StmtPlanRequiresRevalidation(CachedPlanSource *plansource) +{ + if (plansource->raw_parse_tree != NULL) + return stmt_requires_parse_analysis(plansource->raw_parse_tree); + else if (plansource->analyzed_parse_tree != NULL) + return query_requires_rewrite_plan(plansource->analyzed_parse_tree); + /* empty query never needs revalidation */ + return false; +} + +/* + * Determine if creating a plan for this CachedPlanSource requires a snapshot. + * In fact this function matches StmtPlanRequiresRevalidation(), but we want + * to preserve the distinction between stmt_requires_parse_analysis() and + * analyze_requires_snapshot(). + */ +static bool +BuildingPlanRequiresSnapshot(CachedPlanSource *plansource) +{ + if (plansource->raw_parse_tree != NULL) + return analyze_requires_snapshot(plansource->raw_parse_tree); + else if (plansource->analyzed_parse_tree != NULL) + return query_requires_rewrite_plan(plansource->analyzed_parse_tree); + /* empty query never needs a snapshot */ + return false; +} + /* * RevalidateCachedQuery: ensure validity of analyzed-and-rewritten query tree. * @@ -592,7 +676,6 @@ RevalidateCachedQuery(CachedPlanSource *plansource, bool release_generic) { bool snapshot_set; - RawStmt *rawtree; List *tlist; /* transient query-tree list */ List *qlist; /* permanent query-tree list */ TupleDesc resultDesc; @@ -615,7 +698,10 @@ RevalidateCachedQuery(CachedPlanSource *plansource, /* * If the query is currently valid, we should have a saved search_path --- * check to see if that matches the current environment. If not, we want - * to force replan. + * to force replan. (We could almost ignore this consideration when + * working from an analyzed parse tree; but there are scenarios where + * planning can have search_path-dependent results, for example if it + * inlines an old-style SQL function.) */ if (plansource->is_valid) { @@ -662,9 +748,9 @@ RevalidateCachedQuery(CachedPlanSource *plansource, } /* - * Discard the no-longer-useful query tree. (Note: we don't want to do - * this any earlier, else we'd not have been able to release locks - * correctly in the race condition case.) + * Discard the no-longer-useful rewritten query tree. (Note: we don't + * want to do this any earlier, else we'd not have been able to release + * locks correctly in the race condition case.) */ plansource->is_valid = false; plansource->query_list = NIL; @@ -711,25 +797,52 @@ RevalidateCachedQuery(CachedPlanSource *plansource, } /* - * Run parse analysis and rule rewriting. The parser tends to scribble on - * its input, so we must copy the raw parse tree to prevent corruption of - * the cache. + * Run parse analysis (if needed) and rule rewriting. */ - rawtree = copyObject(plansource->raw_parse_tree); - if (rawtree == NULL) - tlist = NIL; - else if (plansource->parserSetup != NULL) - tlist = pg_analyze_and_rewrite_withcb(rawtree, - plansource->query_string, - plansource->parserSetup, - plansource->parserSetupArg, - queryEnv); + if (plansource->raw_parse_tree != NULL) + { + /* Source is raw parse tree */ + RawStmt *rawtree; + + /* + * The parser tends to scribble on its input, so we must copy the raw + * parse tree to prevent corruption of the cache. + */ + rawtree = copyObject(plansource->raw_parse_tree); + if (plansource->parserSetup != NULL) + tlist = pg_analyze_and_rewrite_withcb(rawtree, + plansource->query_string, + plansource->parserSetup, + plansource->parserSetupArg, + queryEnv); + else + tlist = pg_analyze_and_rewrite_fixedparams(rawtree, + plansource->query_string, + plansource->param_types, + plansource->num_params, + queryEnv); + } + else if (plansource->analyzed_parse_tree != NULL) + { + /* Source is pre-analyzed query, so we only need to rewrite */ + Query *analyzed_tree; + + /* The rewriter scribbles on its input, too, so copy */ + analyzed_tree = copyObject(plansource->analyzed_parse_tree); + /* Acquire locks needed before rewriting ... */ + AcquireRewriteLocks(analyzed_tree, true, false); + /* ... and do it */ + tlist = pg_rewrite_query(analyzed_tree); + } else - tlist = pg_analyze_and_rewrite_fixedparams(rawtree, - plansource->query_string, - plansource->param_types, - plansource->num_params, - queryEnv); + { + /* Empty query, nothing to do */ + tlist = NIL; + } + + /* Apply post-rewrite callback if there is one */ + if (plansource->postRewrite != NULL) + plansource->postRewrite(tlist, plansource->postRewriteArg); /* Release snapshot if we got one */ if (snapshot_set) @@ -963,8 +1076,7 @@ BuildCachedPlan(CachedPlanSource *plansource, List *qlist, */ snapshot_set = false; if (!ActiveSnapshotSet() && - plansource->raw_parse_tree && - analyze_requires_snapshot(plansource->raw_parse_tree)) + BuildingPlanRequiresSnapshot(plansource)) { PushActiveSnapshot(GetTransactionSnapshot()); snapshot_set = true; @@ -1703,6 +1815,7 @@ CopyCachedPlan(CachedPlanSource *plansource) newsource = (CachedPlanSource *) palloc0(sizeof(CachedPlanSource)); newsource->magic = CACHEDPLANSOURCE_MAGIC; newsource->raw_parse_tree = copyObject(plansource->raw_parse_tree); + newsource->analyzed_parse_tree = copyObject(plansource->analyzed_parse_tree); newsource->query_string = pstrdup(plansource->query_string); MemoryContextSetIdentifier(source_context, newsource->query_string); newsource->commandTag = plansource->commandTag; @@ -1718,6 +1831,8 @@ CopyCachedPlan(CachedPlanSource *plansource) newsource->num_params = plansource->num_params; newsource->parserSetup = plansource->parserSetup; newsource->parserSetupArg = plansource->parserSetupArg; + newsource->postRewrite = plansource->postRewrite; + newsource->postRewriteArg = plansource->postRewriteArg; newsource->cursor_options = plansource->cursor_options; newsource->fixed_result = plansource->fixed_result; if (plansource->resultDesc) -- cgit v1.2.3