summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path/costsize.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/path/costsize.c')
-rw-r--r--src/backend/optimizer/path/costsize.c366
1 files changed, 339 insertions, 27 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 679d8ed597e..ec3c23013a3 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1756,6 +1756,322 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
}
/*
+ * is_fake_var
+ * Workaround for generate_append_tlist() which generates fake Vars with
+ * varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
+ */
+static bool
+is_fake_var(Expr *expr)
+{
+ if (IsA(expr, RelabelType))
+ expr = (Expr *) ((RelabelType *) expr)->arg;
+
+ return (IsA(expr, Var) && ((Var *) expr)->varno == 0);
+}
+
+/*
+ * get_width_cost_multiplier
+ * Returns relative complexity of comparing two values based on its width.
+ * The idea behind is that the comparison becomes more expensive the longer the
+ * value is. Return value is in cpu_operator_cost units.
+ */
+static double
+get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
+{
+ double width = -1.0; /* fake value */
+
+ if (IsA(expr, RelabelType))
+ expr = (Expr *) ((RelabelType *) expr)->arg;
+
+ /* Try to find actual stat in corresponding relation */
+ if (IsA(expr, Var))
+ {
+ Var *var = (Var *) expr;
+
+ if (var->varno > 0 && var->varno < root->simple_rel_array_size)
+ {
+ RelOptInfo *rel = root->simple_rel_array[var->varno];
+
+ if (rel != NULL &&
+ var->varattno >= rel->min_attr &&
+ var->varattno <= rel->max_attr)
+ {
+ int ndx = var->varattno - rel->min_attr;
+
+ if (rel->attr_widths[ndx] > 0)
+ width = rel->attr_widths[ndx];
+ }
+ }
+ }
+
+ /* Didn't find any actual stats, try using type width instead. */
+ if (width < 0.0)
+ {
+ Node *node = (Node*) expr;
+
+ width = get_typavgwidth(exprType(node), exprTypmod(node));
+ }
+
+ /*
+ * Values are passed as Datum type, so comparisons can't be cheaper than
+ * comparing a Datum value.
+ *
+ * FIXME I find this reasoning questionable. We may pass int2, and comparing
+ * it is probably a bit cheaper than comparing a bigint.
+ */
+ if (width <= sizeof(Datum))
+ return 1.0;
+
+ /*
+ * We consider the cost of a comparison not to be directly proportional to
+ * width of the argument, because widths of the arguments could be slightly
+ * different (we only know the average width for the whole column). So we
+ * use log16(width) as an estimate.
+ */
+ return 1.0 + 0.125 * LOG2(width / sizeof(Datum));
+}
+
+/*
+ * compute_cpu_sort_cost
+ * compute CPU cost of sort (i.e. in-memory)
+ *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, columns may have a very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algorithm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from:
+ *
+ * "Quicksort Is Optimal", Robert Sedgewick, Jon Bentley, 2002
+ * [https://www.cs.princeton.edu/~rs/talks/QuicksortIsOptimal.pdf]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of identical
+ * tuples with value Ki, then the estimate of number of comparisons is:
+ *
+ * log(N! / (X1! * X2! * ..)) ~ sum(Xi * log(N/Xi))
+ *
+ * We assume all Xi the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ * N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparisons on ck is roughly
+ *
+ * ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ * N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column width, not just the comparator cost.
+ *
+ * NOTE: some callers currently pass NIL for pathkeys because they
+ * can't conveniently supply the sort keys. In this case, it will fallback to
+ * simple comparison cost estimate.
+ */
+static Cost
+compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+ Cost comparison_cost, double tuples, double output_tuples,
+ bool heapSort)
+{
+ Cost per_tuple_cost = 0.0;
+ ListCell *lc;
+ List *pathkeyExprs = NIL;
+ double tuplesPerPrevGroup = tuples;
+ double totalFuncCost = 1.0;
+ bool has_fake_var = false;
+ int i = 0;
+ Oid prev_datatype = InvalidOid;
+ List *cache_varinfos = NIL;
+
+ /* fallback if pathkeys is unknown */
+ if (list_length(pathkeys) == 0)
+ {
+ /*
+ * If we'll use a bounded heap-sort keeping just K tuples in memory, for
+ * a total number of tuple comparisons of N log2 K; but the constant
+ * factor is a bit higher than for quicksort. Tweak it so that the cost
+ * curve is continuous at the crossover point.
+ */
+ output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
+ per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
+
+ /* add cost provided by caller */
+ per_tuple_cost += comparison_cost;
+
+ return per_tuple_cost * tuples;
+ }
+
+ /*
+ * Computing total cost of sorting takes into account:
+ * - per column comparison function cost
+ * - we try to compute needed number of comparison per column
+ */
+ foreach(lc, pathkeys)
+ {
+ PathKey *pathkey = (PathKey*) lfirst(lc);
+ EquivalenceMember *em;
+ double nGroups,
+ correctedNGroups;
+ Cost funcCost = 1.0;
+
+ /*
+ * We believe that equivalence members aren't very different, so, to
+ * estimate cost we consider just the first member.
+ */
+ em = (EquivalenceMember *) linitial(pathkey->pk_eclass->ec_members);
+
+ if (em->em_datatype != InvalidOid)
+ {
+ /* do not lookup funcCost if the data type is the same */
+ if (prev_datatype != em->em_datatype)
+ {
+ Oid sortop;
+ QualCost cost;
+
+ sortop = get_opfamily_member(pathkey->pk_opfamily,
+ em->em_datatype, em->em_datatype,
+ pathkey->pk_strategy);
+
+ cost.startup = 0;
+ cost.per_tuple = 0;
+ add_function_cost(root, get_opcode(sortop), NULL, &cost);
+
+ /*
+ * add_function_cost returns the product of cpu_operator_cost
+ * and procost, but we need just procost, co undo that.
+ */
+ funcCost = cost.per_tuple / cpu_operator_cost;
+
+ prev_datatype = em->em_datatype;
+ }
+ }
+
+ /* factor in the width of the values in this column */
+ funcCost *= get_width_cost_multiplier(root, em->em_expr);
+
+ /* now we have per-key cost, so add to the running total */
+ totalFuncCost += funcCost;
+
+ /* remember if we have found a fake Var in pathkeys */
+ has_fake_var |= is_fake_var(em->em_expr);
+ pathkeyExprs = lappend(pathkeyExprs, em->em_expr);
+
+ /*
+ * We need to calculate the number of comparisons for this column, which
+ * requires knowing the group size. So we estimate the number of groups
+ * by calling estimate_num_groups_incremental(), which estimates the
+ * group size for "new" pathkeys.
+ *
+ * Note: estimate_num_groups_incremntal does not handle fake Vars, so use
+ * a default estimate otherwise.
+ */
+ if (!has_fake_var)
+ nGroups = estimate_num_groups_incremental(root, pathkeyExprs,
+ tuplesPerPrevGroup, NULL, NULL,
+ &cache_varinfos,
+ list_length(pathkeyExprs) - 1);
+ else if (tuples > 4.0)
+ /*
+ * Use geometric mean as estimation if there are no stats.
+ *
+ * We don't use DEFAULT_NUM_DISTINCT here, because that’s used for
+ * a single column, but here we’re dealing with multiple columns.
+ */
+ nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys));
+ else
+ nGroups = tuples;
+
+ /*
+ * Presorted keys are not considered in the cost above, but we still do
+ * have to compare them in the qsort comparator. So make sure to factor
+ * in the cost in that case.
+ */
+ if (i >= nPresortedKeys)
+ {
+ if (heapSort)
+ {
+ /* have to keep at least one group, and a multiple of group size */
+ correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup);
+ }
+ else
+ /* all groups in the input */
+ correctedNGroups = nGroups;
+
+ correctedNGroups = Max(1.0, ceil(correctedNGroups));
+
+ per_tuple_cost += totalFuncCost * LOG2(correctedNGroups);
+ }
+
+ i++;
+
+ /*
+ * Uniform distributions with all groups being of the same size are the
+ * best case, with nice smooth behavior. Real-world distributions tend
+ * not to be uniform, though, and we don’t have any reliable easy-to-use
+ * information. As a basic defense against skewed distributions, we use
+ * a 1.5 factor to make the expected group a bit larger, but we need to
+ * be careful not to make the group larger than in the preceding step.
+ */
+ tuplesPerPrevGroup = Min(tuplesPerPrevGroup,
+ ceil(1.5 * tuplesPerPrevGroup / nGroups));
+
+ /*
+ * Once we get single-row group, it means tuples in the group are unique
+ * and we can skip all remaining columns.
+ */
+ if (tuplesPerPrevGroup <= 1.0)
+ break;
+ }
+
+ list_free(pathkeyExprs);
+
+ /* per_tuple_cost is in cpu_operator_cost units */
+ per_tuple_cost *= cpu_operator_cost;
+
+ /*
+ * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E.
+ * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation
+ * formula has additional term proportional to number of tuples (See Chapter
+ * 8.2 and Theorem 4.1). That affects cases with a low number of tuples,
+ * approximately less than 1e4. We could implement it as an additional
+ * multiplier under the logarithm, but we use a bit more complex formula
+ * which takes into account the number of unique tuples and it’s not clear
+ * how to combine the multiplier with the number of groups. Estimate it as
+ * 10 in cpu_operator_cost unit.
+ */
+ per_tuple_cost += 10 * cpu_operator_cost;
+
+ per_tuple_cost += comparison_cost;
+
+ return tuples * per_tuple_cost;
+}
+
+/*
+ * simple wrapper just to estimate best sort path
+ */
+Cost
+cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
+ double tuples)
+{
+ return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys,
+ 0, tuples, tuples, false);
+}
+
+/*
* cost_tuplesort
* Determines and returns the cost of sorting a relation using tuplesort,
* not including the cost of reading the input data.
@@ -1771,7 +2087,7 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
* number of initial runs formed and M is the merge order used by tuplesort.c.
* Since the average initial run should be about sort_mem, we have
* disk traffic = 2 * relsize * ceil(logM(p / sort_mem))
- * cpu = comparison_cost * t * log2(t)
+ * and cpu cost (computed by compute_cpu_sort_cost()).
*
* If the sort is bounded (i.e., only the first k result tuples are needed)
* and k tuples can fit into sort_mem, we use a heap method that keeps only
@@ -1790,9 +2106,11 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
* 'comparison_cost' is the extra cost per comparison, if any
* 'sort_mem' is the number of kilobytes of work memory allowed for the sort
* 'limit_tuples' is the bound on the number of output tuples; -1 if no bound
+ * 'startup_cost' is expected to be 0 at input. If there is "input cost" it should
+ * be added by caller later
*/
static void
-cost_tuplesort(Cost *startup_cost, Cost *run_cost,
+cost_tuplesort(PlannerInfo *root, List *pathkeys, Cost *startup_cost, Cost *run_cost,
double tuples, int width,
Cost comparison_cost, int sort_mem,
double limit_tuples)
@@ -1809,9 +2127,6 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
if (tuples < 2.0)
tuples = 2.0;
- /* Include the default cost-per-comparison */
- comparison_cost += 2.0 * cpu_operator_cost;
-
/* Do we have a useful LIMIT? */
if (limit_tuples > 0 && limit_tuples < tuples)
{
@@ -1835,12 +2150,10 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
double log_runs;
double npageaccesses;
- /*
- * CPU costs
- *
- * Assume about N log2 N comparisons
- */
- *startup_cost = comparison_cost * tuples * LOG2(tuples);
+ /* CPU costs */
+ *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+ comparison_cost, tuples,
+ tuples, false);
/* Disk costs */
@@ -1856,18 +2169,17 @@ cost_tuplesort(Cost *startup_cost, Cost *run_cost,
}
else if (tuples > 2 * output_tuples || input_bytes > sort_mem_bytes)
{
- /*
- * We'll use a bounded heap-sort keeping just K tuples in memory, for
- * a total number of tuple comparisons of N log2 K; but the constant
- * factor is a bit higher than for quicksort. Tweak it so that the
- * cost curve is continuous at the crossover point.
- */
- *startup_cost = comparison_cost * tuples * LOG2(2.0 * output_tuples);
+ /* We'll use a bounded heap-sort keeping just K tuples in memory. */
+ *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+ comparison_cost, tuples,
+ output_tuples, true);
}
else
{
/* We'll use plain quicksort on all the input tuples */
- *startup_cost = comparison_cost * tuples * LOG2(tuples);
+ *startup_cost = compute_cpu_sort_cost(root, pathkeys, 0,
+ comparison_cost, tuples,
+ tuples, false);
}
/*
@@ -1900,8 +2212,8 @@ cost_incremental_sort(Path *path,
double input_tuples, int width, Cost comparison_cost, int sort_mem,
double limit_tuples)
{
- Cost startup_cost = 0,
- run_cost = 0,
+ Cost startup_cost,
+ run_cost,
input_run_cost = input_total_cost - input_startup_cost;
double group_tuples,
input_groups;
@@ -1986,7 +2298,7 @@ cost_incremental_sort(Path *path,
* pessimistic about incremental sort performance and increase its average
* group size by half.
*/
- cost_tuplesort(&group_startup_cost, &group_run_cost,
+ cost_tuplesort(root, pathkeys, &group_startup_cost, &group_run_cost,
1.5 * group_tuples, width, comparison_cost, sort_mem,
limit_tuples);
@@ -1994,7 +2306,7 @@ cost_incremental_sort(Path *path,
* Startup cost of incremental sort is the startup cost of its first group
* plus the cost of its input.
*/
- startup_cost += group_startup_cost
+ startup_cost = group_startup_cost
+ input_startup_cost + group_input_run_cost;
/*
@@ -2003,7 +2315,7 @@ cost_incremental_sort(Path *path,
* group, plus the total cost to process the remaining groups, plus the
* remaining cost of input.
*/
- run_cost += group_run_cost
+ run_cost = group_run_cost
+ (group_run_cost + group_startup_cost) * (input_groups - 1)
+ group_input_run_cost * (input_groups - 1);
@@ -2043,7 +2355,7 @@ cost_sort(Path *path, PlannerInfo *root,
Cost startup_cost;
Cost run_cost;
- cost_tuplesort(&startup_cost, &run_cost,
+ cost_tuplesort(root, pathkeys, &startup_cost, &run_cost,
tuples, width,
comparison_cost, sort_mem,
limit_tuples);
@@ -2141,7 +2453,7 @@ append_nonpartial_cost(List *subpaths, int numpaths, int parallel_workers)
* Determines and returns the cost of an Append node.
*/
void
-cost_append(AppendPath *apath)
+cost_append(AppendPath *apath, PlannerInfo *root)
{
ListCell *l;
@@ -2209,7 +2521,7 @@ cost_append(AppendPath *apath)
* any child.
*/
cost_sort(&sort_path,
- NULL, /* doesn't currently need root */
+ root,
pathkeys,
subpath->total_cost,
subpath->rows,