diff options
| author | Tom Lane <tgl@sss.pgh.pa.us> | 2013-07-24 00:44:09 -0400 | 
|---|---|---|
| committer | Tom Lane <tgl@sss.pgh.pa.us> | 2013-07-24 00:44:09 -0400 | 
| commit | b32a25c3d5292c800c0468097b9a63e931a0af0f (patch) | |
| tree | 729dcaa301880321f155b8a054216138f4cbc45e /src/backend/utils/adt | |
| parent | 10a509d82956dee14eb2011bd266cd3c728ae188 (diff) | |
Fix booltestsel() for case where we have NULL stats but not MCV stats.
In a boolean column that contains mostly nulls, ANALYZE might not find
enough non-null values to populate the most-common-values stats,
but it would still create a pg_statistic entry with stanullfrac set.
The logic in booltestsel() for this situation did the wrong thing for
"col IS NOT TRUE" and "col IS NOT FALSE" tests, forgetting that null
values would satisfy these tests (so that the true selectivity would
be close to one, not close to zero).  Per bug #8274.
Fix by Andrew Gierth, some comment-smithing by me.
Diffstat (limited to 'src/backend/utils/adt')
| -rw-r--r-- | src/backend/utils/adt/selfuncs.c | 22 | 
1 files changed, 10 insertions, 12 deletions
| diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c index da66f347078..d8c1a889edc 100644 --- a/src/backend/utils/adt/selfuncs.c +++ b/src/backend/utils/adt/selfuncs.c @@ -1529,31 +1529,29 @@ booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,  			/*  			 * No most-common-value info available. Still have null fraction  			 * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust -			 * for null fraction and assume an even split for boolean tests. +			 * for null fraction and assume a 50-50 split of TRUE and FALSE.  			 */  			switch (booltesttype)  			{  				case IS_UNKNOWN: - -					/* -					 * Use freq_null directly. -					 */ +					/* select only NULL values */  					selec = freq_null;  					break;  				case IS_NOT_UNKNOWN: - -					/* -					 * Select not unknown (not null) values. Calculate from -					 * freq_null. -					 */ +					/* select non-NULL values */  					selec = 1.0 - freq_null;  					break;  				case IS_TRUE: -				case IS_NOT_TRUE:  				case IS_FALSE: -				case IS_NOT_FALSE: +					/* Assume we select half of the non-NULL values */  					selec = (1.0 - freq_null) / 2.0;  					break; +				case IS_NOT_TRUE: +				case IS_NOT_FALSE: +					/* Assume we select NULLs plus half of the non-NULLs */ +					/* equiv. to freq_null + (1.0 - freq_null) / 2.0 */ +					selec = (freq_null + 1.0) / 2.0; +					break;  				default:  					elog(ERROR, "unrecognized booltesttype: %d",  						 (int) booltesttype); | 
