diff options
Diffstat (limited to 'src/backend/optimizer/path/costsize.c')
-rw-r--r-- | src/backend/optimizer/path/costsize.c | 111 |
1 files changed, 58 insertions, 53 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 6673d271c26..ed98ba7dbd2 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1794,7 +1794,7 @@ is_fake_var(Expr *expr) static double get_width_cost_multiplier(PlannerInfo *root, Expr *expr) { - double width = -1.0; /* fake value */ + double width = -1.0; /* fake value */ if (IsA(expr, RelabelType)) expr = (Expr *) ((RelabelType *) expr)->arg; @@ -1802,17 +1802,17 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr) /* Try to find actual stat in corresponding relation */ if (IsA(expr, Var)) { - Var *var = (Var *) expr; + Var *var = (Var *) expr; if (var->varno > 0 && var->varno < root->simple_rel_array_size) { - RelOptInfo *rel = root->simple_rel_array[var->varno]; + RelOptInfo *rel = root->simple_rel_array[var->varno]; if (rel != NULL && var->varattno >= rel->min_attr && var->varattno <= rel->max_attr) { - int ndx = var->varattno - rel->min_attr; + int ndx = var->varattno - rel->min_attr; if (rel->attr_widths[ndx] > 0) width = rel->attr_widths[ndx]; @@ -1823,7 +1823,7 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr) /* Didn't find any actual stats, try using type width instead. */ if (width < 0.0) { - Node *node = (Node*) expr; + Node *node = (Node *) expr; width = get_typavgwidth(exprType(node), exprTypmod(node)); } @@ -1832,17 +1832,17 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr) * Values are passed as Datum type, so comparisons can't be cheaper than * comparing a Datum value. * - * FIXME I find this reasoning questionable. We may pass int2, and comparing - * it is probably a bit cheaper than comparing a bigint. + * FIXME I find this reasoning questionable. We may pass int2, and + * comparing it is probably a bit cheaper than comparing a bigint. */ if (width <= sizeof(Datum)) return 1.0; /* * We consider the cost of a comparison not to be directly proportional to - * width of the argument, because widths of the arguments could be slightly - * different (we only know the average width for the whole column). So we - * use log16(width) as an estimate. + * width of the argument, because widths of the arguments could be + * slightly different (we only know the average width for the whole + * column). So we use log16(width) as an estimate. */ return 1.0 + 0.125 * LOG2(width / sizeof(Datum)); } @@ -1902,23 +1902,23 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, bool heapSort) { Cost per_tuple_cost = 0.0; - ListCell *lc; - List *pathkeyExprs = NIL; + ListCell *lc; + List *pathkeyExprs = NIL; double tuplesPerPrevGroup = tuples; double totalFuncCost = 1.0; bool has_fake_var = false; int i = 0; Oid prev_datatype = InvalidOid; - List *cache_varinfos = NIL; + List *cache_varinfos = NIL; /* fallback if pathkeys is unknown */ if (list_length(pathkeys) == 0) { /* - * If we'll use a bounded heap-sort keeping just K tuples in memory, for - * a total number of tuple comparisons of N log2 K; but the constant - * factor is a bit higher than for quicksort. Tweak it so that the cost - * curve is continuous at the crossover point. + * If we'll use a bounded heap-sort keeping just K tuples in memory, + * for a total number of tuple comparisons of N log2 K; but the + * constant factor is a bit higher than for quicksort. Tweak it so + * that the cost curve is continuous at the crossover point. */ output_tuples = (heapSort) ? 2.0 * output_tuples : tuples; per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples); @@ -1930,17 +1930,17 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, } /* - * Computing total cost of sorting takes into account: - * - per column comparison function cost - * - we try to compute needed number of comparison per column + * Computing total cost of sorting takes into account the per-column + * comparison function cost. We try to compute the needed number of + * comparisons per column. */ foreach(lc, pathkeys) { - PathKey *pathkey = (PathKey*) lfirst(lc); - EquivalenceMember *em; - double nGroups, - correctedNGroups; - Cost funcCost = 1.0; + PathKey *pathkey = (PathKey *) lfirst(lc); + EquivalenceMember *em; + double nGroups, + correctedNGroups; + Cost funcCost = 1.0; /* * We believe that equivalence members aren't very different, so, to @@ -1985,10 +1985,10 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, pathkeyExprs = lappend(pathkeyExprs, em->em_expr); /* - * We need to calculate the number of comparisons for this column, which - * requires knowing the group size. So we estimate the number of groups - * by calling estimate_num_groups_incremental(), which estimates the - * group size for "new" pathkeys. + * We need to calculate the number of comparisons for this column, + * which requires knowing the group size. So we estimate the number of + * groups by calling estimate_num_groups_incremental(), which + * estimates the group size for "new" pathkeys. * * Note: estimate_num_groups_incremental does not handle fake Vars, so * use a default estimate otherwise. @@ -1999,26 +1999,30 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, &cache_varinfos, list_length(pathkeyExprs) - 1); else if (tuples > 4.0) + /* * Use geometric mean as estimation if there are no stats. * - * We don't use DEFAULT_NUM_DISTINCT here, because that’s used for - * a single column, but here we’re dealing with multiple columns. + * We don't use DEFAULT_NUM_DISTINCT here, because that's used for + * a single column, but here we're dealing with multiple columns. */ nGroups = ceil(2.0 + sqrt(tuples) * (i + 1) / list_length(pathkeys)); else nGroups = tuples; /* - * Presorted keys are not considered in the cost above, but we still do - * have to compare them in the qsort comparator. So make sure to factor - * in the cost in that case. + * Presorted keys are not considered in the cost above, but we still + * do have to compare them in the qsort comparator. So make sure to + * factor in the cost in that case. */ if (i >= nPresortedKeys) { if (heapSort) { - /* have to keep at least one group, and a multiple of group size */ + /* + * have to keep at least one group, and a multiple of group + * size + */ correctedNGroups = ceil(output_tuples / tuplesPerPrevGroup); } else @@ -2033,19 +2037,20 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, i++; /* - * Uniform distributions with all groups being of the same size are the - * best case, with nice smooth behavior. Real-world distributions tend - * not to be uniform, though, and we don’t have any reliable easy-to-use - * information. As a basic defense against skewed distributions, we use - * a 1.5 factor to make the expected group a bit larger, but we need to - * be careful not to make the group larger than in the preceding step. + * Uniform distributions with all groups being of the same size are + * the best case, with nice smooth behavior. Real-world distributions + * tend not to be uniform, though, and we don't have any reliable + * easy-to-use information. As a basic defense against skewed + * distributions, we use a 1.5 factor to make the expected group a bit + * larger, but we need to be careful not to make the group larger than + * in the preceding step. */ tuplesPerPrevGroup = Min(tuplesPerPrevGroup, ceil(1.5 * tuplesPerPrevGroup / nGroups)); /* - * Once we get single-row group, it means tuples in the group are unique - * and we can skip all remaining columns. + * Once we get single-row group, it means tuples in the group are + * unique and we can skip all remaining columns. */ if (tuplesPerPrevGroup <= 1.0) break; @@ -2057,15 +2062,15 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys, per_tuple_cost *= cpu_operator_cost; /* - * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles E. - * Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort estimation - * formula has additional term proportional to number of tuples (See Chapter - * 8.2 and Theorem 4.1). That affects cases with a low number of tuples, - * approximately less than 1e4. We could implement it as an additional - * multiplier under the logarithm, but we use a bit more complex formula - * which takes into account the number of unique tuples and it’s not clear - * how to combine the multiplier with the number of groups. Estimate it as - * 10 in cpu_operator_cost unit. + * Accordingly to "Introduction to algorithms", Thomas H. Cormen, Charles + * E. Leiserson, Ronald L. Rivest, ISBN 0-07-013143-0, quicksort + * estimation formula has additional term proportional to number of tuples + * (see Chapter 8.2 and Theorem 4.1). That affects cases with a low number + * of tuples, approximately less than 1e4. We could implement it as an + * additional multiplier under the logarithm, but we use a bit more + * complex formula which takes into account the number of unique tuples + * and it's not clear how to combine the multiplier with the number of + * groups. Estimate it as 10 cpu_operator_cost units. */ per_tuple_cost += 10 * cpu_operator_cost; @@ -2082,7 +2087,7 @@ cost_sort_estimate(PlannerInfo *root, List *pathkeys, int nPresortedKeys, double tuples) { return compute_cpu_sort_cost(root, pathkeys, nPresortedKeys, - 0, tuples, tuples, false); + 0, tuples, tuples, false); } /* |