diff options
author | David Rowley <drowley@postgresql.org> | 2025-09-29 14:36:39 +1300 |
---|---|---|
committer | David Rowley <drowley@postgresql.org> | 2025-09-29 14:36:39 +1300 |
commit | 2cb49c609bd9bf44d319591df7bef1eb107cb375 (patch) | |
tree | 4776617a118d8d87d24ce40906094cb536bdd005 /src | |
parent | acf0960c23090ea6690f2c5da3bc2625836516a9 (diff) |
Improve planner's width estimates for set operations
For UNION, EXCEPT and INTERSECT, we were not very good at estimating the
PathTarget.width for the set operation. Since the targetlist of the set
operation is made up of Vars with varno==0, this would result in
get_expr_width() applying a default estimate based on the Var's type
rather than taking width estimates from any relation's statistics.
Here we attempt to improve the situation by looking at the width estimates
for the set operation child paths and calculating the average width of the
relevant child paths weighted over the estimated number of rows. For
UNION and INTERSECT, the relevant paths to look at are *all* child paths.
For EXCEPT, since we don't return rows from the right-hand child (only
possibly remove left-hand rows matching those), we use only the left-hand
child for width estimates.
This also adjusts the hashed-UNION Path's PathTarget to use the same
PathTarget as its Append subpath. Both PathTargets will be the same and
are void of any resjunk columns, per generate_append_tlist(). Making
the AggPath use the same PathTarget saves having to adjust the "width"
of the AggPath's PathTarget too.
This was reported as a bug by sunw.fnst, but it's not something we ever
claimed to do properly. Plus, if we were to adjust this in back
branches, plans could change as the estimated input sizes to Sorts and
Hash Aggregates could go up or down. Plan choices aren't something we
want to destabilize in stable versions.
Reported-by: sunw.fnst <936739278@qq.com>
Author: David Rowley <drowleyml@gmail.com>
Discussion: https://postgr.es/m/tencent_34CF8017AB81944A4C08DD089D410AB6C306@qq.com
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/optimizer/prep/prepunion.c | 59 |
1 files changed, 55 insertions, 4 deletions
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 28a4ae64440..6bd0f4a5dc3 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -23,6 +23,8 @@ */ #include "postgres.h" +#include <math.h> + #include "access/htup_details.h" #include "catalog/pg_type.h" #include "miscadmin.h" @@ -74,6 +76,8 @@ static List *generate_append_tlist(List *colTypes, List *colCollations, List *input_tlists, List *refnames_tlist); static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist); +static PathTarget *create_setop_pathtarget(PlannerInfo *root, List *tlist, + List *child_pathlist); /* @@ -803,7 +807,8 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, /* Build result relation. */ result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids); - result_rel->reltarget = create_pathtarget(root, tlist); + result_rel->reltarget = create_setop_pathtarget(root, tlist, + cheapest_pathlist); result_rel->consider_parallel = consider_parallel; result_rel->consider_startup = (root->tuple_fraction > 0); @@ -892,7 +897,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, path = (Path *) create_agg_path(root, result_rel, apath, - create_pathtarget(root, tlist), + result_rel->reltarget, AGG_HASHED, AGGSPLIT_SIMPLE, groupList, @@ -908,7 +913,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root, path = (Path *) create_agg_path(root, result_rel, gpath, - create_pathtarget(root, tlist), + result_rel->reltarget, AGG_HASHED, AGGSPLIT_SIMPLE, groupList, @@ -1130,7 +1135,18 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root, /* Build result relation. */ result_rel = fetch_upper_rel(root, UPPERREL_SETOP, bms_union(lrel->relids, rrel->relids)); - result_rel->reltarget = create_pathtarget(root, tlist); + + /* + * Create the PathTarget and set the width accordingly. For EXCEPT, since + * the set op result won't contain rows from the rpath, we only account + * for the width of the lpath. For INTERSECT, use both input paths. + */ + if (op->op == SETOP_EXCEPT) + result_rel->reltarget = create_setop_pathtarget(root, tlist, + list_make1(lpath)); + else + result_rel->reltarget = create_setop_pathtarget(root, tlist, + list_make2(lpath, rpath)); /* * Estimate number of distinct groups that we'll need hashtable entries @@ -1619,3 +1635,38 @@ generate_setop_grouplist(SetOperationStmt *op, List *targetlist) Assert(lg == NULL); return grouplist; } + +/* + * create_setop_pathtarget + * Do the normal create_pathtarget() work, plus set the resulting + * PathTarget's width to the average width of the Paths in child_pathlist + * weighted using the estimated row count of each path. + * + * Note: This is required because set op target lists use varno==0, which + * results in a type default width estimate rather than one that's based on + * statistics of the columns from the set op children. + */ +static PathTarget * +create_setop_pathtarget(PlannerInfo *root, List *tlist, List *child_pathlist) +{ + PathTarget *reltarget; + ListCell *lc; + double parent_rows = 0; + double parent_size = 0; + + reltarget = create_pathtarget(root, tlist); + + /* Calculate the total rows and total size. */ + foreach(lc, child_pathlist) + { + Path *path = (Path *) lfirst(lc); + + parent_rows += path->rows; + parent_size += path->parent->reltarget->width * path->rows; + } + + if (parent_rows > 0) + reltarget->width = rint(parent_size / parent_rows); + + return reltarget; +} |