summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path
diff options
context:
space:
mode:
authorDavid Rowley <drowley@postgresql.org>2025-07-29 15:18:01 +1200
committerDavid Rowley <drowley@postgresql.org>2025-07-29 15:18:01 +1200
commit4bc62b86849065939a6b85273fece6b92d6e97bf (patch)
tree38fbd1181e3622c1d28dbd452be375946d4a27fb /src/backend/optimizer/path
parent71c0921b649d7a800eb2d6f93539890eaa14d979 (diff)
Display Memoize planner estimates in EXPLAIN
There've been a few complaints that it can be overly difficult to figure out why the planner picked a Memoize plan. To help address that, here we adjust the EXPLAIN output to display the following additional details: 1) The estimated number of cache entries that can be stored at once 2) The estimated number of unique lookup keys that we expect to see 3) The number of lookups we expect 4) The estimated hit ratio Technically #4 can be calculated using #1, #2 and #3, but it's not a particularly obvious calculation, so we opt to display it explicitly. The original patch by Lukas Fittl only displayed the hit ratio, but there was a fear that might lead to more questions about how that was calculated. The idea with displaying all 4 is to be transparent which may allow queries to be tuned more easily. For example, if #2 isn't correct then maybe extended statistics or a manual n_distinct estimate can be used to help fix poor plan choices. Author: Ilia Evdokimov <ilya.evdokimov@tantorlabs.com> Author: Lukas Fittl <lukas@fittl.com> Reviewed-by: David Rowley <dgrowleyml@gmail.com> Reviewed-by: Andrei Lepikhov <lepihov@gmail.com> Reviewed-by: Robert Haas <robertmhaas@gmail.com> Discussion: https://postgr.es/m/CAP53Pky29GWAVVk3oBgKBDqhND0BRBN6yTPeguV_qSivFL5N_g%40mail.gmail.com
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r--src/backend/optimizer/path/costsize.c18
1 files changed, 12 insertions, 6 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1f04a2c182c..344a3188317 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -2572,13 +2572,13 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
Cost input_startup_cost = mpath->subpath->startup_cost;
Cost input_total_cost = mpath->subpath->total_cost;
double tuples = mpath->subpath->rows;
- double calls = mpath->calls;
+ Cardinality est_calls = mpath->est_calls;
int width = mpath->subpath->pathtarget->width;
double hash_mem_bytes;
double est_entry_bytes;
- double est_cache_entries;
- double ndistinct;
+ Cardinality est_cache_entries;
+ Cardinality ndistinct;
double evict_ratio;
double hit_ratio;
Cost startup_cost;
@@ -2604,7 +2604,7 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
est_cache_entries = floor(hash_mem_bytes / est_entry_bytes);
/* estimate on the distinct number of parameter values */
- ndistinct = estimate_num_groups(root, mpath->param_exprs, calls, NULL,
+ ndistinct = estimate_num_groups(root, mpath->param_exprs, est_calls, NULL,
&estinfo);
/*
@@ -2616,7 +2616,10 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
* certainly mean a MemoizePath will never survive add_path().
*/
if ((estinfo.flags & SELFLAG_USED_DEFAULT) != 0)
- ndistinct = calls;
+ ndistinct = est_calls;
+
+ /* Remember the ndistinct estimate for EXPLAIN */
+ mpath->est_unique_keys = ndistinct;
/*
* Since we've already estimated the maximum number of entries we can
@@ -2644,9 +2647,12 @@ cost_memoize_rescan(PlannerInfo *root, MemoizePath *mpath,
* must look at how many scans are estimated in total for this node and
* how many of those scans we expect to get a cache hit.
*/
- hit_ratio = ((calls - ndistinct) / calls) *
+ hit_ratio = ((est_calls - ndistinct) / est_calls) *
(est_cache_entries / Max(ndistinct, est_cache_entries));
+ /* Remember the hit ratio estimate for EXPLAIN */
+ mpath->est_hit_ratio = hit_ratio;
+
Assert(hit_ratio >= 0 && hit_ratio <= 1.0);
/*