From 022cd0bfd33968f2b004106cfeaa3b2951e7f322 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 5 Jun 2020 16:18:50 -0400 Subject: Use query collation, not column's collation, while examining statistics. Commit 5e0928005 changed the planner so that, instead of blindly using DEFAULT_COLLATION_OID when invoking operators for selectivity estimation, it would use the collation of the column whose statistics we're considering. This was recognized as still being not quite the right thing, but it seemed like a good incremental improvement. However, shortly thereafter we introduced nondeterministic collations, and that creates cases where operators can fail if they're passed the wrong collation. We don't want planning to fail in cases where the query itself would work, so this means that we *must* use the query's collation when invoking operators for estimation purposes. The only real problem this creates is in ineq_histogram_selectivity, where the binary search might produce a garbage answer if we perform comparisons using a different collation than the column's histogram is ordered with. However, when the query's collation is significantly different from the column's default collation, the estimate we previously generated would be pretty irrelevant anyway; so it's not clear that this will result in noticeably worse estimates in practice. (A follow-on patch will improve this situation in HEAD, but it seems too invasive for back-patch.) The patch requires changing the signatures of mcv_selectivity and allied functions, which are exported and very possibly are used by extensions. In HEAD, I just did that, but an API/ABI break of this sort isn't acceptable in stable branches. Therefore, in v12 the patch introduces "mcv_selectivity_ext" and so on, with signatures matching HEAD, and makes the old functions into wrappers that assume DEFAULT_COLLATION_OID should be used. That does not match the prior behavior, but it should avoid risk of failure in most cases. (In practice, I think most extension datatypes aren't collation-aware, so the change probably doesn't matter to them.) Per report from James Lucas. Back-patch to v12 where the problem was introduced. Discussion: https://postgr.es/m/CAAFmbbOvfi=wMM=3qRsPunBSLb8BFREno2oOzSBS=mzfLPKABw@mail.gmail.com --- src/backend/utils/adt/like_support.c | 63 +++++++++++++++++------------------- 1 file changed, 30 insertions(+), 33 deletions(-) (limited to 'src/backend/utils/adt/like_support.c') diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c index 77cc3781965..6465c9edcc3 100644 --- a/src/backend/utils/adt/like_support.c +++ b/src/backend/utils/adt/like_support.c @@ -90,7 +90,9 @@ static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt, Selectivity *rest_selec); static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, - Oid vartype, Oid opfamily, Const *prefixcon); + Oid vartype, Oid opfamily, + Oid collation, + Const *prefixcon); static Selectivity like_selectivity(const char *patt, int pattlen, bool case_insensitive); static Selectivity regex_selectivity(const char *patt, int pattlen, @@ -586,8 +588,8 @@ patternsel_common(PlannerInfo *root, if (eqopr == InvalidOid) elog(ERROR, "no = operator for opfamily %u", opfamily); - result = var_eq_const(&vardata, eqopr, prefix->constvalue, - false, true, false); + result = var_eq_const_ext(&vardata, eqopr, collation, + prefix->constvalue, false, true, false); } else { @@ -618,8 +620,9 @@ patternsel_common(PlannerInfo *root, opfuncid = get_opcode(oprid); fmgr_info(opfuncid, &opproc); - selec = histogram_selectivity(&vardata, &opproc, constval, true, - 10, 1, &hist_size); + selec = histogram_selectivity_ext(&vardata, &opproc, collation, + constval, true, + 10, 1, &hist_size); /* If not at least 100 entries, use the heuristic method */ if (hist_size < 100) @@ -629,7 +632,7 @@ patternsel_common(PlannerInfo *root, if (pstatus == Pattern_Prefix_Partial) prefixsel = prefix_selectivity(root, &vardata, vartype, - opfamily, prefix); + opfamily, collation, prefix); else prefixsel = 1.0; heursel = prefixsel * rest_selec; @@ -661,8 +664,9 @@ patternsel_common(PlannerInfo *root, * directly to the result selectivity. Also add up the total fraction * represented by MCV entries. */ - mcv_selec = mcv_selectivity(&vardata, &opproc, constval, true, - &sumcommon); + mcv_selec = mcv_selectivity_ext(&vardata, &opproc, collation, + constval, true, + &sumcommon); /* * Now merge the results from the MCV and histogram calculations, @@ -1170,12 +1174,13 @@ pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation, */ static Selectivity prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, - Oid vartype, Oid opfamily, Const *prefixcon) + Oid vartype, Oid opfamily, + Oid collation, + Const *prefixcon) { Selectivity prefixsel; Oid cmpopr; FmgrInfo opproc; - AttStatsSlot sslot; Const *greaterstrcon; Selectivity eq_sel; @@ -1185,10 +1190,11 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, elog(ERROR, "no >= operator for opfamily %u", opfamily); fmgr_info(get_opcode(cmpopr), &opproc); - prefixsel = ineq_histogram_selectivity(root, vardata, - &opproc, true, true, - prefixcon->constvalue, - prefixcon->consttype); + prefixsel = ineq_histogram_selectivity_ext(root, vardata, + &opproc, true, true, + collation, + prefixcon->constvalue, + prefixcon->consttype); if (prefixsel < 0.0) { @@ -1196,33 +1202,24 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, return DEFAULT_MATCH_SEL; } - /*------- - * If we can create a string larger than the prefix, say - * "x < greaterstr". We try to generate the string referencing the - * collation of the var's statistics, but if that's not available, - * use DEFAULT_COLLATION_OID. - *------- + /* + * If we can create a string larger than the prefix, say "x < greaterstr". */ - if (HeapTupleIsValid(vardata->statsTuple) && - get_attstatsslot(&sslot, vardata->statsTuple, - STATISTIC_KIND_HISTOGRAM, InvalidOid, 0)) - /* sslot.stacoll is set up */ ; - else - sslot.stacoll = DEFAULT_COLLATION_OID; cmpopr = get_opfamily_member(opfamily, vartype, vartype, BTLessStrategyNumber); if (cmpopr == InvalidOid) elog(ERROR, "no < operator for opfamily %u", opfamily); fmgr_info(get_opcode(cmpopr), &opproc); - greaterstrcon = make_greater_string(prefixcon, &opproc, sslot.stacoll); + greaterstrcon = make_greater_string(prefixcon, &opproc, collation); if (greaterstrcon) { Selectivity topsel; - topsel = ineq_histogram_selectivity(root, vardata, - &opproc, false, false, - greaterstrcon->constvalue, - greaterstrcon->consttype); + topsel = ineq_histogram_selectivity_ext(root, vardata, + &opproc, false, false, + collation, + greaterstrcon->constvalue, + greaterstrcon->consttype); /* ineq_histogram_selectivity worked before, it shouldn't fail now */ Assert(topsel >= 0.0); @@ -1253,8 +1250,8 @@ prefix_selectivity(PlannerInfo *root, VariableStatData *vardata, BTEqualStrategyNumber); if (cmpopr == InvalidOid) elog(ERROR, "no = operator for opfamily %u", opfamily); - eq_sel = var_eq_const(vardata, cmpopr, prefixcon->constvalue, - false, true, false); + eq_sel = var_eq_const_ext(vardata, cmpopr, collation, prefixcon->constvalue, + false, true, false); prefixsel = Max(prefixsel, eq_sel); -- cgit v1.2.3