Teach UNION planner to remove dummy inputsHEAD origin/master origin/HEAD master

This adjusts UNION planning so that the planner produces more optimal plans when one or more of the UNION's subqueries have been proven to be empty (a dummy rel). If any of the inputs are empty, then that input can be removed from the Append / MergeAppend. Previously, a const-false "Result" node would appear to represent this. Removing empty inputs has a few extra benefits when only 1 union child remains as it means the Append or MergeAppend can be removed in setrefs.c, making the plan slightly faster to execute. Also, we can provide better n_distinct estimates by looking at the sole remaining input rel's statistics. Author: David Rowley <dgrowleyml@gmail.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/CAApHDvri53PPF76c3M94_QNWbJfXjyCnjXuj_2=LYM-0m8WZtw@mail.gmail.com
author: David Rowley <drowley@postgresql.org> 2025-10-04 14:30:03 +1300
committer: David Rowley <drowley@postgresql.org> 2025-10-04 14:30:03 +1300
commit: 03d40e4b523b2075c198aa89d9445ea8297f8eca (patch)
tree: a94bd60a859a868e19f7c068d88dc4ad16fd9ef3 /src/backend/optimizer/prep
parent: 5092aae431e3e1a20324ea3a42a181c63f703d0d (diff)
1 files changed, 47 insertions, 9 deletions
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 6c0e2383af9..547dbd53540 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -523,6 +523,13 @@ build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel,
 		bool		is_sorted;
 		int			presorted_keys;
 
+		/* If the input rel is dummy, propagate that to this query level */
+		if (is_dummy_rel(final_rel))
+		{
+			mark_dummy_rel(rel);
+			continue;
+		}
+
 		/*
 		 * Include the cheapest path as-is so that the set operation can be
 		 * cheaply implemented using a method which does not require the input
@@ -763,6 +770,10 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 		RelOptInfo *rel = lfirst(lc);
 		Path	   *ordered_path;
 
+		/* Skip any UNION children that are proven not to yield any rows */
+		if (is_dummy_rel(rel))
+			continue;
+
 		cheapest_pathlist = lappend(cheapest_pathlist,
 									rel->cheapest_total_path);
 
@@ -812,6 +823,15 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 	result_rel->consider_parallel = consider_parallel;
 	result_rel->consider_startup = (root->tuple_fraction > 0);
 
+	/* If all UNION children were dummy rels, make the resulting rel dummy */
+	if (cheapest_pathlist == NIL)
+	{
+		result_rel->reltarget = create_pathtarget(root, list_nth(tlist_list, 0));
+		mark_dummy_rel(result_rel);
+
+		return result_rel;
+	}
+
 	/*
 	 * Append the child results together using the cheapest paths from each
 	 * union child.
@@ -876,15 +896,33 @@ generate_union_paths(SetOperationStmt *op, PlannerInfo *root,
 		bool		can_sort = grouping_is_sortable(groupList);
 		bool		can_hash = grouping_is_hashable(groupList);
 
-		/*
-		 * XXX for the moment, take the number of distinct groups as equal to
-		 * the total input size, i.e., the worst case.  This is too
-		 * conservative, but it's not clear how to get a decent estimate of
-		 * the true size.  One should note as well the propensity of novices
-		 * to write UNION rather than UNION ALL even when they don't expect
-		 * any duplicates...
-		 */
-		dNumGroups = apath->rows;
+		if (list_length(cheapest_pathlist) == 1)
+		{
+			Path	   *path = linitial(cheapest_pathlist);
+
+			/*
+			 * In the case where only one union child remains due to the
+			 * detection of one or more dummy union children, obtain an
+			 * estimate on the surviving child directly.
+			 */
+			dNumGroups = estimate_num_groups(root,
+											 path->pathtarget->exprs,
+											 path->rows,
+											 NULL,
+											 NULL);
+		}
+		else
+		{
+			/*
+			 * Otherwise, for the moment, take the number of distinct groups
+			 * as equal to the total input size, i.e., the worst case.  This
+			 * is too conservative, but it's not clear how to get a decent
+			 * estimate of the true size.  One should note as well the
+			 * propensity of novices to write UNION rather than UNION ALL even
+			 * when they don't expect any duplicates...
+			 */
+			dNumGroups = apath->rows;
+		}
 
 		if (can_hash)
 		{
author	David Rowley <drowley@postgresql.org>	2025-10-04 14:30:03 +1300
committer	David Rowley <drowley@postgresql.org>	2025-10-04 14:30:03 +1300
commit	03d40e4b523b2075c198aa89d9445ea8297f8eca (patch)
tree	a94bd60a859a868e19f7c068d88dc4ad16fd9ef3 /src/backend/optimizer/prep
parent	5092aae431e3e1a20324ea3a42a181c63f703d0d (diff)