summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2005-01-28 20:34:27 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2005-01-28 20:34:27 +0000
commit875b0c62fabeafd65136ad5d6825274f983b8e88 (patch)
tree680fd5be04cdea754b90c7c21a7284621498b44b
parent0bf2587df46f3b554d62f7628d1d474bda7ddfc5 (diff)
When dealing with multiple grouping columns coming from the same table,
clamp the estimated number of groups to table row count over 10, instead of table row count; this reflects a heuristic that people probably won't group over a near-unique set of columns, and the knowledge that we don't currently have any way to estimate the correlation of the columns better than guessing. This change creates a trivial plan change in one of the regression tests.
-rw-r--r--src/backend/utils/adt/selfuncs.c33
-rw-r--r--src/test/regress/expected/subselect.out4
2 files changed, 27 insertions, 10 deletions
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index c556f4bc6ec..7b020ff4895 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.169 2004/12/31 22:01:22 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.170 2005/01/28 20:34:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1937,10 +1937,13 @@ add_unique_group_var(Query *root, List *varinfos,
* if we considered ones of the same rel, we'd be double-counting the
* restriction selectivity of the equality in the next step.
* 3. For Vars within a single source rel, we multiply together the numbers
- * of values, clamp to the number of rows in the rel, and then multiply
- * by the selectivity of the restriction clauses for that rel. The
- * initial product is probably too high (it's the worst case) but since
- * we can clamp to the rel's rows it won't be hugely bad. Multiplying
+ * of values, clamp to the number of rows in the rel (divided by 10 if
+ * more than one Var), and then multiply by the selectivity of the
+ * restriction clauses for that rel. When there's more than one Var,
+ * the initial product is probably too high (it's the worst case) but
+ * clamping to a fraction of the rel's rows seems to be a helpful
+ * heuristic for not letting the estimate get out of hand. (The factor
+ * of 10 is derived from pre-Postgres-7.4 practice.) Multiplying
* by the restriction selectivity is effectively assuming that the
* restriction clauses are independent of the grouping, which is a crummy
* assumption, but it's hard to do better.
@@ -2040,6 +2043,7 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
RelOptInfo *rel = varinfo1->rel;
double reldistinct = varinfo1->ndistinct;
+ int relvarcount = 1;
List *newvarinfos = NIL;
/*
@@ -2051,7 +2055,10 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
if (varinfo2->rel == varinfo1->rel)
+ {
reldistinct *= varinfo2->ndistinct;
+ relvarcount++;
+ }
else
{
/* not time to process varinfo2 yet */
@@ -2066,10 +2073,20 @@ estimate_num_groups(Query *root, List *groupExprs, double input_rows)
if (rel->tuples > 0)
{
/*
- * Clamp to size of rel, multiply by restriction selectivity.
+ * Clamp to size of rel, or size of rel / 10 if multiple Vars.
+ * The fudge factor is because the Vars are probably correlated
+ * but we don't know by how much.
+ */
+ double clamp = rel->tuples;
+
+ if (relvarcount > 1)
+ clamp *= 0.1;
+ if (reldistinct > clamp)
+ reldistinct = clamp;
+
+ /*
+ * Multiply by restriction selectivity.
*/
- if (reldistinct > rel->tuples)
- reldistinct = rel->tuples;
reldistinct *= rel->rows / rel->tuples;
/*
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 07e727de482..56bea035981 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -134,11 +134,11 @@ SELECT '' AS five, f1 AS "Correlated Field"
WHERE f3 IS NOT NULL);
five | Correlated Field
------+------------------
- | 2
| 3
| 1
- | 2
| 3
+ | 2
+ | 2
(5 rows)
--