summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDavid Rowley <drowley@postgresql.org>2025-09-29 14:36:39 +1300
committerDavid Rowley <drowley@postgresql.org>2025-09-29 14:36:39 +1300
commit2cb49c609bd9bf44d319591df7bef1eb107cb375 (patch)
tree4776617a118d8d87d24ce40906094cb536bdd005 /src
parentacf0960c23090ea6690f2c5da3bc2625836516a9 (diff)
Improve planner's width estimates for set operations
For UNION, EXCEPT and INTERSECT, we were not very good at estimating the PathTarget.width for the set operation. Since the targetlist of the set operation is made up of Vars with varno==0, this would result in get_expr_width() applying a default estimate based on the Var's type rather than taking width estimates from any relation's statistics. Here we attempt to improve the situation by looking at the width estimates for the set operation child paths and calculating the average width of the relevant child paths weighted over the estimated number of rows. For UNION and INTERSECT, the relevant paths to look at are *all* child paths. For EXCEPT, since we don't return rows from the right-hand child (only possibly remove left-hand rows matching those), we use only the left-hand child for width estimates. This also adjusts the hashed-UNION Path's PathTarget to use the same PathTarget as its Append subpath. Both PathTargets will be the same and are void of any resjunk columns, per generate_append_tlist(). Making the AggPath use the same PathTarget saves having to adjust the "width" of the AggPath's PathTarget too. This was reported as a bug by sunw.fnst, but it's not something we ever claimed to do properly. Plus, if we were to adjust this in back branches, plans could change as the estimated input sizes to Sorts and Hash Aggregates could go up or down. Plan choices aren't something we want to destabilize in stable versions. Reported-by: sunw.fnst <936739278@qq.com> Author: David Rowley <drowleyml@gmail.com> Discussion: https://postgr.es/m/tencent_34CF8017AB81944A4C08DD089D410AB6C306@qq.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/optimizer/prep/prepunion.c59
1 files changed, 55 insertions, 4 deletions
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 28a4ae64440..6bd0f4a5dc3 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -23,6 +23,8 @@
*/
#include "postgres.h"
+#include <math.h>
+
#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
@@ -74,6 +76,8 @@ static List *generate_append_tlist(List *colTypes, List *colCollations,
List *input_tlists,
List *refnames_tlist);
static List *generate_setop_grouplist(SetOperationStmt *op, List *targetlist);
+static PathTarget *create_setop_pathtarget(PlannerInfo *root, List *tlist,
+ List *child_pathlist);
/*
@@ -803,7 +807,8 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
/* Build result relation. */
result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
- result_rel->reltarget = create_pathtarget(root, tlist);
+ result_rel->reltarget = create_setop_pathtarget(root, tlist,
+ cheapest_pathlist);
result_rel->consider_parallel = consider_parallel;
result_rel->consider_startup = (root->tuple_fraction > 0);
@@ -892,7 +897,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
path = (Path *) create_agg_path(root,
result_rel,
apath,
- create_pathtarget(root, tlist),
+ result_rel->reltarget,
AGG_HASHED,
AGGSPLIT_SIMPLE,
groupList,
@@ -908,7 +913,7 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
path = (Path *) create_agg_path(root,
result_rel,
gpath,
- create_pathtarget(root, tlist),
+ result_rel->reltarget,
AGG_HASHED,
AGGSPLIT_SIMPLE,
groupList,
@@ -1130,7 +1135,18 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
/* Build result relation. */
result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
bms_union(lrel->relids, rrel->relids));
- result_rel->reltarget = create_pathtarget(root, tlist);
+
+ /*
+ * Create the PathTarget and set the width accordingly. For EXCEPT, since
+ * the set op result won't contain rows from the rpath, we only account
+ * for the width of the lpath. For INTERSECT, use both input paths.
+ */
+ if (op->op == SETOP_EXCEPT)
+ result_rel->reltarget = create_setop_pathtarget(root, tlist,
+ list_make1(lpath));
+ else
+ result_rel->reltarget = create_setop_pathtarget(root, tlist,
+ list_make2(lpath, rpath));
/*
* Estimate number of distinct groups that we'll need hashtable entries
@@ -1619,3 +1635,38 @@ generate_setop_grouplist(SetOperationStmt *op, List *targetlist)
Assert(lg == NULL);
return grouplist;
}
+
+/*
+ * create_setop_pathtarget
+ * Do the normal create_pathtarget() work, plus set the resulting
+ * PathTarget's width to the average width of the Paths in child_pathlist
+ * weighted using the estimated row count of each path.
+ *
+ * Note: This is required because set op target lists use varno==0, which
+ * results in a type default width estimate rather than one that's based on
+ * statistics of the columns from the set op children.
+ */
+static PathTarget *
+create_setop_pathtarget(PlannerInfo *root, List *tlist, List *child_pathlist)
+{
+ PathTarget *reltarget;
+ ListCell *lc;
+ double parent_rows = 0;
+ double parent_size = 0;
+
+ reltarget = create_pathtarget(root, tlist);
+
+ /* Calculate the total rows and total size. */
+ foreach(lc, child_pathlist)
+ {
+ Path *path = (Path *) lfirst(lc);
+
+ parent_rows += path->rows;
+ parent_size += path->parent->reltarget->width * path->rows;
+ }
+
+ if (parent_rows > 0)
+ reltarget->width = rint(parent_size / parent_rows);
+
+ return reltarget;
+}