diff options
Diffstat (limited to 'src/backend')
| -rw-r--r-- | src/backend/utils/adt/selfuncs.c | 49 | 
1 files changed, 32 insertions, 17 deletions
| diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index 5c7e8325a19..bbc344f16bd 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -2329,7 +2329,9 @@ eqjoinsel_semi(Oid operator,  		bool	   *hasmatch1;  		bool	   *hasmatch2;  		double		nullfrac1 = stats1->stanullfrac; -		double		matchfreq1; +		double		matchfreq1, +					uncertainfrac, +					uncertain;  		int			i,  					nmatches; @@ -2382,18 +2384,26 @@ eqjoinsel_semi(Oid operator,  		 * the uncertain rows that a fraction nd2/nd1 have join partners. We  		 * can discount the known-matched MCVs from the distinct-values counts  		 * before doing the division. +		 * +		 * Crude as the above is, it's completely useless if we don't have +		 * reliable ndistinct values for both sides.  Hence, if either nd1 +		 * or nd2 is default, punt and assume half of the uncertain rows +		 * have join partners.  		 */ -		nd1 -= nmatches; -		nd2 -= nmatches; -		if (nd1 <= nd2 || nd2 <= 0) -			selec = Max(matchfreq1, 1.0 - nullfrac1); -		else +		if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)  		{ -			double		uncertain = 1.0 - matchfreq1 - nullfrac1; - -			CLAMP_PROBABILITY(uncertain); -			selec = matchfreq1 + (nd2 / nd1) * uncertain; +			nd1 -= nmatches; +			nd2 -= nmatches; +			if (nd1 <= nd2 || nd2 <= 0) +				uncertainfrac = 1.0; +			else +				uncertainfrac = nd2 / nd1;  		} +		else +			uncertainfrac = 0.5; +		uncertain = 1.0 - matchfreq1 - nullfrac1; +		CLAMP_PROBABILITY(uncertain); +		selec = matchfreq1 + uncertainfrac * uncertain;  	}  	else  	{ @@ -2403,15 +2413,20 @@ eqjoinsel_semi(Oid operator,  		 */  		double		nullfrac1 = stats1 ? stats1->stanullfrac : 0.0; -		if (vardata1->rel) -			nd1 = Min(nd1, vardata1->rel->rows); -		if (vardata2->rel) -			nd2 = Min(nd2, vardata2->rel->rows); +		if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT) +		{ +			if (vardata1->rel) +				nd1 = Min(nd1, vardata1->rel->rows); +			if (vardata2->rel) +				nd2 = Min(nd2, vardata2->rel->rows); -		if (nd1 <= nd2 || nd2 <= 0) -			selec = 1.0 - nullfrac1; +			if (nd1 <= nd2 || nd2 <= 0) +				selec = 1.0 - nullfrac1; +			else +				selec = (nd2 / nd1) * (1.0 - nullfrac1); +		}  		else -			selec = (nd2 / nd1) * (1.0 - nullfrac1); +			selec = 0.5 * (1.0 - nullfrac1);  	}  	if (have_mcvs1) | 
