summaryrefslogtreecommitdiff
path: root/src/backend/access
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access')
-rw-r--r--src/backend/access/nbtree/README7
-rw-r--r--src/backend/access/nbtree/nbtcompare.c8
-rw-r--r--src/backend/access/nbtree/nbtsearch.c68
-rw-r--r--src/backend/access/nbtree/nbtsort.c54
-rw-r--r--src/backend/access/nbtree/nbtutils.c168
5 files changed, 227 insertions, 78 deletions
diff --git a/src/backend/access/nbtree/README b/src/backend/access/nbtree/README
index 53fda66572f..34fb90e4363 100644
--- a/src/backend/access/nbtree/README
+++ b/src/backend/access/nbtree/README
@@ -1,4 +1,4 @@
-$PostgreSQL: pgsql/src/backend/access/nbtree/README,v 1.15 2006/12/28 23:16:39 tgl Exp $
+$PostgreSQL: pgsql/src/backend/access/nbtree/README,v 1.16 2007/01/09 02:14:10 tgl Exp $
This directory contains a correct implementation of Lehman and Yao's
high-concurrency B-tree management algorithm (P. Lehman and S. Yao,
@@ -483,5 +483,6 @@ Notes to operator class implementors
With this implementation, we require each supported combination of
datatypes to supply us with a comparison procedure via pg_amproc.
This procedure must take two nonnull values A and B and return an int32 < 0,
-0, or > 0 if A < B, A = B, or A > B, respectively. See nbtcompare.c for
-examples.
+0, or > 0 if A < B, A = B, or A > B, respectively. The procedure must
+not return INT_MIN for "A < B", since the value may be negated before
+being tested for sign. See nbtcompare.c for examples.
diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c
index 27caa8495cf..97489c60368 100644
--- a/src/backend/access/nbtree/nbtcompare.c
+++ b/src/backend/access/nbtree/nbtcompare.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtcompare.c,v 1.53 2007/01/05 22:19:23 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtcompare.c,v 1.54 2007/01/09 02:14:10 tgl Exp $
*
* NOTES
*
@@ -22,10 +22,10 @@
*
* The result is always an int32 regardless of the input datatype.
*
- * NOTE: although any negative int32 is acceptable for reporting "<",
- * and any positive int32 is acceptable for reporting ">", routines
+ * Although any negative int32 (except INT_MIN) is acceptable for reporting
+ * "<", and any positive int32 is acceptable for reporting ">", routines
* that work on 32-bit or wider datatypes can't just return "a - b".
- * That could overflow and give the wrong answer. Also, one should not
+ * That could overflow and give the wrong answer. Also, one must not
* return INT_MIN to report "<", since some callers will negate the result.
*
* NOTE: it is critical that the comparison function impose a total order
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 2da75f547c7..fc8b18a2e90 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.110 2007/01/05 22:19:23 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.111 2007/01/09 02:14:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -376,12 +376,17 @@ _bt_compare(Relation rel,
{
if (isNull)
result = 0; /* NULL "=" NULL */
+ else if (scankey->sk_flags & SK_BT_NULLS_FIRST)
+ result = -1; /* NULL "<" NOT_NULL */
else
result = 1; /* NULL ">" NOT_NULL */
}
else if (isNull) /* key is NOT_NULL and item is NULL */
{
- result = -1; /* NOT_NULL "<" NULL */
+ if (scankey->sk_flags & SK_BT_NULLS_FIRST)
+ result = 1; /* NOT_NULL ">" NULL */
+ else
+ result = -1; /* NOT_NULL "<" NULL */
}
else
{
@@ -390,16 +395,15 @@ _bt_compare(Relation rel,
* the sk_argument as right arg (they might be of different
* types). Since it is convenient for callers to think of
* _bt_compare as comparing the scankey to the index item, we have
- * to flip the sign of the comparison result.
- *
- * Note: curious-looking coding is to avoid overflow if comparison
- * function returns INT_MIN. There is no risk of overflow for
- * positive results.
+ * to flip the sign of the comparison result. (Unless it's a DESC
+ * column, in which case we *don't* flip the sign.)
*/
result = DatumGetInt32(FunctionCall2(&scankey->sk_func,
datum,
scankey->sk_argument));
- result = (result < 0) ? 1 : -result;
+
+ if (!(scankey->sk_flags & SK_BT_DESC))
+ result = -result;
}
/* if the keys are unequal, return the difference */
@@ -617,11 +621,12 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* in the first row member makes the condition unmatchable, just
* like qual_ok = false.
*/
- cur = (ScanKey) DatumGetPointer(cur->sk_argument);
- Assert(cur->sk_flags & SK_ROW_MEMBER);
- if (cur->sk_flags & SK_ISNULL)
+ ScanKey subkey = (ScanKey) DatumGetPointer(cur->sk_argument);
+
+ Assert(subkey->sk_flags & SK_ROW_MEMBER);
+ if (subkey->sk_flags & SK_ISNULL)
return false;
- memcpy(scankeys + i, cur, sizeof(ScanKeyData));
+ memcpy(scankeys + i, subkey, sizeof(ScanKeyData));
/*
* If the row comparison is the last positioning key we accepted,
@@ -632,21 +637,46 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* even if the row comparison is of ">" or "<" type, because the
* condition applied to all but the last row member is effectively
* ">=" or "<=", and so the extra keys don't break the positioning
- * scheme.
+ * scheme. But, by the same token, if we aren't able to use all
+ * the row members, then the part of the row comparison that we
+ * did use has to be treated as just a ">=" or "<=" condition,
+ * and so we'd better adjust strat_total accordingly.
*/
if (i == keysCount - 1)
{
- while (!(cur->sk_flags & SK_ROW_END))
+ bool used_all_subkeys = false;
+
+ Assert(!(subkey->sk_flags & SK_ROW_END));
+ for(;;)
{
- cur++;
- Assert(cur->sk_flags & SK_ROW_MEMBER);
- if (cur->sk_attno != keysCount + 1)
+ subkey++;
+ Assert(subkey->sk_flags & SK_ROW_MEMBER);
+ if (subkey->sk_attno != keysCount + 1)
break; /* out-of-sequence, can't use it */
- if (cur->sk_flags & SK_ISNULL)
+ if (subkey->sk_strategy != cur->sk_strategy)
+ break; /* wrong direction, can't use it */
+ if (subkey->sk_flags & SK_ISNULL)
break; /* can't use null keys */
Assert(keysCount < INDEX_MAX_KEYS);
- memcpy(scankeys + keysCount, cur, sizeof(ScanKeyData));
+ memcpy(scankeys + keysCount, subkey, sizeof(ScanKeyData));
keysCount++;
+ if (subkey->sk_flags & SK_ROW_END)
+ {
+ used_all_subkeys = true;
+ break;
+ }
+ }
+ if (!used_all_subkeys)
+ {
+ switch (strat_total)
+ {
+ case BTLessStrategyNumber:
+ strat_total = BTLessEqualStrategyNumber;
+ break;
+ case BTGreaterStrategyNumber:
+ strat_total = BTGreaterEqualStrategyNumber;
+ break;
+ }
}
break; /* done with outer loop */
}
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 945f20729c0..a917e27a765 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -57,7 +57,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.109 2007/01/05 22:19:23 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.110 2007/01/09 02:14:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -728,39 +728,45 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
ScanKey entry;
Datum attrDatum1,
attrDatum2;
- bool isFirstNull,
- isSecondNull;
+ bool isNull1,
+ isNull2;
+ int32 compare;
entry = indexScanKey + i - 1;
- attrDatum1 = index_getattr(itup, i, tupdes,
- &isFirstNull);
- attrDatum2 = index_getattr(itup2, i, tupdes,
- &isSecondNull);
- if (isFirstNull)
+ attrDatum1 = index_getattr(itup, i, tupdes, &isNull1);
+ attrDatum2 = index_getattr(itup2, i, tupdes, &isNull2);
+ if (isNull1)
{
- if (!isSecondNull)
- {
- load1 = false;
- break;
- }
+ if (isNull2)
+ compare = 0; /* NULL "=" NULL */
+ else if (entry->sk_flags & SK_BT_NULLS_FIRST)
+ compare = -1; /* NULL "<" NOT_NULL */
+ else
+ compare = 1; /* NULL ">" NOT_NULL */
+ }
+ else if (isNull2)
+ {
+ if (entry->sk_flags & SK_BT_NULLS_FIRST)
+ compare = 1; /* NOT_NULL ">" NULL */
+ else
+ compare = -1; /* NOT_NULL "<" NULL */
}
- else if (isSecondNull)
- break;
else
{
- int32 compare;
-
compare = DatumGetInt32(FunctionCall2(&entry->sk_func,
attrDatum1,
attrDatum2));
- if (compare > 0)
- {
- load1 = false;
- break;
- }
- else if (compare < 0)
- break;
+
+ if (entry->sk_flags & SK_BT_DESC)
+ compare = -compare;
}
+ if (compare > 0)
+ {
+ load1 = false;
+ break;
+ }
+ else if (compare < 0)
+ break;
}
}
else
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index d82a93a63c5..d453a93cafa 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtutils.c,v 1.81 2007/01/05 22:19:23 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtutils.c,v 1.82 2007/01/09 02:14:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -30,6 +30,7 @@
static bool _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
ScanKey leftarg, ScanKey rightarg,
bool *result);
+static void _bt_mark_scankey_with_indoption(ScanKey skey, int16 *indoption);
static void _bt_mark_scankey_required(ScanKey skey);
static bool _bt_check_rowcompare(ScanKey skey,
IndexTuple tuple, TupleDesc tupdesc,
@@ -49,10 +50,12 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
ScanKey skey;
TupleDesc itupdesc;
int natts;
+ int16 *indoption;
int i;
itupdesc = RelationGetDescr(rel);
natts = RelationGetNumberOfAttributes(rel);
+ indoption = rel->rd_indoption;
skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
@@ -61,6 +64,7 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
FmgrInfo *procinfo;
Datum arg;
bool null;
+ int flags;
/*
* We can use the cached (default) support procs since no cross-type
@@ -68,8 +72,9 @@ _bt_mkscankey(Relation rel, IndexTuple itup)
*/
procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
arg = index_getattr(itup, i + 1, itupdesc, &null);
+ flags = (null ? SK_ISNULL : 0) | (indoption[i] << SK_BT_INDOPTION_SHIFT);
ScanKeyEntryInitializeWithInfo(&skey[i],
- null ? SK_ISNULL : 0,
+ flags,
(AttrNumber) (i + 1),
InvalidStrategy,
InvalidOid,
@@ -96,23 +101,27 @@ _bt_mkscankey_nodata(Relation rel)
{
ScanKey skey;
int natts;
+ int16 *indoption;
int i;
natts = RelationGetNumberOfAttributes(rel);
+ indoption = rel->rd_indoption;
skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
for (i = 0; i < natts; i++)
{
FmgrInfo *procinfo;
+ int flags;
/*
* We can use the cached (default) support procs since no cross-type
* comparison can be needed.
*/
procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC);
+ flags = SK_ISNULL | (indoption[i] << SK_BT_INDOPTION_SHIFT);
ScanKeyEntryInitializeWithInfo(&skey[i],
- SK_ISNULL,
+ flags,
(AttrNumber) (i + 1),
InvalidStrategy,
InvalidOid,
@@ -157,7 +166,13 @@ _bt_freestack(BTStack stack)
* the number of input keys, so->numberOfKeys gets the number of output
* keys (possibly less, never greater).
*
- * The primary purpose of this routine is to discover how many scan keys
+ * The output keys are marked with additional sk_flag bits beyond the
+ * system-standard bits supplied by the caller. The DESC and NULLS_FIRST
+ * indoption bits for the relevant index attribute are copied into the flags.
+ * Also, for a DESC column, we commute (flip) all the sk_strategy numbers
+ * so that the index sorts in the desired direction.
+ *
+ * One key purpose of this routine is to discover how many scan keys
* must be satisfied to continue the scan. It also attempts to eliminate
* redundant keys and detect contradictory keys. (If the index opfamily
* provides incomplete sets of cross-type operators, we may fail to detect
@@ -219,6 +234,7 @@ _bt_preprocess_keys(IndexScanDesc scan)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
int numberOfKeys = scan->numberOfKeys;
+ int16 *indoption = scan->indexRelation->rd_indoption;
int new_numberOfKeys;
int numberOfEqualCols;
ScanKey inkeys;
@@ -254,7 +270,8 @@ _bt_preprocess_keys(IndexScanDesc scan)
*/
if (cur->sk_flags & SK_ISNULL)
so->qual_ok = false;
- memcpy(outkeys, inkeys, sizeof(ScanKeyData));
+ _bt_mark_scankey_with_indoption(cur, indoption);
+ memcpy(outkeys, cur, sizeof(ScanKeyData));
so->numberOfKeys = 1;
/* We can mark the qual as required if it's for first index col */
if (cur->sk_attno == 1)
@@ -407,6 +424,9 @@ _bt_preprocess_keys(IndexScanDesc scan)
memset(xform, 0, sizeof(xform));
}
+ /* apply indoption to scankey (might change sk_strategy!) */
+ _bt_mark_scankey_with_indoption(cur, indoption);
+
/* check strategy this key's operator corresponds to */
j = cur->sk_strategy - 1;
@@ -486,6 +506,11 @@ _bt_preprocess_keys(IndexScanDesc scan)
* Note: op always points at the same ScanKey as either leftarg or rightarg.
* Since we don't scribble on the scankeys, this aliasing should cause no
* trouble.
+ *
+ * Note: this routine needs to be insensitive to any DESC option applied
+ * to the index column. For example, "x < 4" is a tighter constraint than
+ * "x < 5" regardless of which way the index is sorted. We don't worry about
+ * NULLS FIRST/LAST either, since the given values are never nulls.
*/
static bool
_bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
@@ -498,6 +523,7 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
optype,
opcintype,
cmp_op;
+ StrategyNumber strat;
/*
* The opfamily we need to worry about is identified by the index column.
@@ -538,11 +564,18 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
* operator. (This cannot result in infinite recursion, since no
* indexscan initiated by syscache lookup will use cross-data-type
* operators.)
+ *
+ * If the sk_strategy was flipped by _bt_mark_scankey_with_indoption,
+ * we have to un-flip it to get the correct opfamily member.
*/
+ strat = op->sk_strategy;
+ if (op->sk_flags & SK_BT_DESC)
+ strat = BTCommuteStrategyNumber(strat);
+
cmp_op = get_opfamily_member(rel->rd_opfamily[leftarg->sk_attno - 1],
lefttype,
righttype,
- op->sk_strategy);
+ strat);
if (OidIsValid(cmp_op))
{
RegProcedure cmp_proc = get_opcode(cmp_op);
@@ -562,13 +595,56 @@ _bt_compare_scankey_args(IndexScanDesc scan, ScanKey op,
}
/*
+ * Mark a scankey with info from the index's indoption array.
+ *
+ * We copy the appropriate indoption value into the scankey sk_flags
+ * (shifting to avoid clobbering system-defined flag bits). Also, if
+ * the DESC option is set, commute (flip) the operator strategy number.
+ *
+ * This function is applied to the *input* scankey structure; therefore
+ * on a rescan we will be looking at already-processed scankeys. Hence
+ * we have to be careful not to re-commute the strategy if we already did it.
+ * It's a bit ugly to modify the caller's copy of the scankey but in practice
+ * there shouldn't be any problem, since the index's indoptions are certainly
+ * not going to change while the scankey survives.
+ */
+static void
+_bt_mark_scankey_with_indoption(ScanKey skey, int16 *indoption)
+{
+ int addflags;
+
+ addflags = indoption[skey->sk_attno - 1] << SK_BT_INDOPTION_SHIFT;
+ if ((addflags & SK_BT_DESC) && !(skey->sk_flags & SK_BT_DESC))
+ skey->sk_strategy = BTCommuteStrategyNumber(skey->sk_strategy);
+ skey->sk_flags |= addflags;
+
+ if (skey->sk_flags & SK_ROW_HEADER)
+ {
+ ScanKey subkey = (ScanKey) DatumGetPointer(skey->sk_argument);
+
+ for (;;)
+ {
+ Assert(subkey->sk_flags & SK_ROW_MEMBER);
+ addflags = indoption[subkey->sk_attno - 1] << SK_BT_INDOPTION_SHIFT;
+ if ((addflags & SK_BT_DESC) && !(subkey->sk_flags & SK_BT_DESC))
+ subkey->sk_strategy = BTCommuteStrategyNumber(subkey->sk_strategy);
+ subkey->sk_flags |= addflags;
+ if (subkey->sk_flags & SK_ROW_END)
+ break;
+ subkey++;
+ }
+ }
+}
+
+/*
* Mark a scankey as "required to continue the scan".
*
* Depending on the operator type, the key may be required for both scan
* directions or just one. Also, if the key is a row comparison header,
* we have to mark the appropriate subsidiary ScanKeys as required. In
* such cases, the first subsidiary key is required, but subsequent ones
- * are required only as long as they correspond to successive index columns.
+ * are required only as long as they correspond to successive index columns
+ * and match the leading column as to sort direction.
* Otherwise the row comparison ordering is different from the index ordering
* and so we can't stop the scan on the basis of those lower-order columns.
*
@@ -616,9 +692,10 @@ _bt_mark_scankey_required(ScanKey skey)
for (;;)
{
Assert(subkey->sk_flags & SK_ROW_MEMBER);
- Assert(subkey->sk_strategy == skey->sk_strategy);
if (subkey->sk_attno != attno)
break; /* non-adjacent key, so not required */
+ if (subkey->sk_strategy != skey->sk_strategy)
+ break; /* wrong direction, so not required */
subkey->sk_flags |= addflags;
if (subkey->sk_flags & SK_ROW_END)
break;
@@ -731,15 +808,32 @@ _bt_checkkeys(IndexScanDesc scan,
if (isNull)
{
- /*
- * Since NULLs are sorted after non-NULLs, we know we have reached
- * the upper limit of the range of values for this index attr. On
- * a forward scan, we can stop if this qual is one of the "must
- * match" subset. On a backward scan, however, we should keep
- * going.
- */
- if ((key->sk_flags & SK_BT_REQFWD) && ScanDirectionIsForward(dir))
- *continuescan = false;
+ if (key->sk_flags & SK_BT_NULLS_FIRST)
+ {
+ /*
+ * Since NULLs are sorted before non-NULLs, we know we have
+ * reached the lower limit of the range of values for this
+ * index attr. On a backward scan, we can stop if this qual is
+ * one of the "must match" subset. On a forward scan,
+ * however, we should keep going.
+ */
+ if ((key->sk_flags & SK_BT_REQBKWD) &&
+ ScanDirectionIsBackward(dir))
+ *continuescan = false;
+ }
+ else
+ {
+ /*
+ * Since NULLs are sorted after non-NULLs, we know we have
+ * reached the upper limit of the range of values for this
+ * index attr. On a forward scan, we can stop if this qual is
+ * one of the "must match" subset. On a backward scan,
+ * however, we should keep going.
+ */
+ if ((key->sk_flags & SK_BT_REQFWD) &&
+ ScanDirectionIsForward(dir))
+ *continuescan = false;
+ }
/*
* In any case, this indextuple doesn't match the qual.
@@ -809,7 +903,6 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
bool isNull;
Assert(subkey->sk_flags & SK_ROW_MEMBER);
- Assert(subkey->sk_strategy == skey->sk_strategy);
datum = index_getattr(tuple,
subkey->sk_attno,
@@ -818,16 +911,32 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
if (isNull)
{
- /*
- * Since NULLs are sorted after non-NULLs, we know we have reached
- * the upper limit of the range of values for this index attr. On
- * a forward scan, we can stop if this qual is one of the "must
- * match" subset. On a backward scan, however, we should keep
- * going.
- */
- if ((subkey->sk_flags & SK_BT_REQFWD) &&
- ScanDirectionIsForward(dir))
- *continuescan = false;
+ if (subkey->sk_flags & SK_BT_NULLS_FIRST)
+ {
+ /*
+ * Since NULLs are sorted before non-NULLs, we know we have
+ * reached the lower limit of the range of values for this
+ * index attr. On a backward scan, we can stop if this qual is
+ * one of the "must match" subset. On a forward scan,
+ * however, we should keep going.
+ */
+ if ((subkey->sk_flags & SK_BT_REQBKWD) &&
+ ScanDirectionIsBackward(dir))
+ *continuescan = false;
+ }
+ else
+ {
+ /*
+ * Since NULLs are sorted after non-NULLs, we know we have
+ * reached the upper limit of the range of values for this
+ * index attr. On a forward scan, we can stop if this qual is
+ * one of the "must match" subset. On a backward scan,
+ * however, we should keep going.
+ */
+ if ((subkey->sk_flags & SK_BT_REQFWD) &&
+ ScanDirectionIsForward(dir))
+ *continuescan = false;
+ }
/*
* In any case, this indextuple doesn't match the qual.
@@ -859,6 +968,9 @@ _bt_check_rowcompare(ScanKey skey, IndexTuple tuple, TupleDesc tupdesc,
datum,
subkey->sk_argument));
+ if (subkey->sk_flags & SK_BT_DESC)
+ cmpresult = -cmpresult;
+
/* Done comparing if unequal, else advance to next column */
if (cmpresult != 0)
break;