From 2a6368343ff43743ddd90d0f4c2d0ac03e18aa85 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Wed, 19 Sep 2018 01:54:10 +0300 Subject: Add support for nearest-neighbor (KNN) searches to SP-GiST Currently, KNN searches were supported only by GiST. SP-GiST also capable to support them. This commit implements that support. SP-GiST scan stack is replaced with queue, which serves as stack if no ordering is specified. KNN support is provided for three SP-GIST opclasses: quad_point_ops, kd_point_ops and poly_ops (catversion is bumped). Some common parts between GiST and SP-GiST KNNs are extracted into separate functions. Discussion: https://postgr.es/m/570825e8-47d0-4732-2bf6-88d67d2d51c8%40postgrespro.ru Author: Nikita Glukhov, Alexander Korotkov based on GSoC work by Vlad Sterzhanov Review: Andrey Borodin, Alexander Korotkov --- doc/src/sgml/indices.sgml | 7 ++++++ doc/src/sgml/spgist.sgml | 58 +++++++++++++++++++++++++++++++++++++++++++---- doc/src/sgml/xindex.sgml | 2 +- 3 files changed, 61 insertions(+), 6 deletions(-) (limited to 'doc/src') diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml index a57c5e2e1f4..df7d16ff68a 100644 --- a/doc/src/sgml/indices.sgml +++ b/doc/src/sgml/indices.sgml @@ -281,6 +281,13 @@ SELECT * FROM places ORDER BY location <-> point '(101,456)' LIMIT 10; For more information see . + + Like GiST, SP-GiST supports nearest-neighbor searches. + For SP-GiST operator classes that support distance ordering, the + corresponding operator is specified in the Ordering Operators + column in . + + index diff --git a/doc/src/sgml/spgist.sgml b/doc/src/sgml/spgist.sgml index d69f034f1c5..126d1f6c156 100644 --- a/doc/src/sgml/spgist.sgml +++ b/doc/src/sgml/spgist.sgml @@ -64,12 +64,13 @@ Built-in <acronym>SP-GiST</acronym> Operator Classes - + Name Indexed Data Type Indexable Operators + Ordering Operators @@ -84,6 +85,9 @@ >^~= + + <-> + quad_point_ops @@ -96,6 +100,9 @@ >^ ~= + + <-> + range_ops @@ -111,6 +118,8 @@ >> @> + + box_ops @@ -129,6 +138,8 @@ |>> |&> + + poly_ops @@ -147,6 +158,9 @@ |>> |&> + + <-> + text_ops @@ -163,6 +177,8 @@ ~>~ ^@ + + inet_ops @@ -180,6 +196,8 @@ <= = + + @@ -191,6 +209,12 @@ supports the same operators but uses a different index data structure which may offer better performance in some applications. + + The quad_point_ops, kd_point_ops and + poly_ops operator classes support the <-> + ordering operator, which enables the k-nearest neighbor (k-NN) + search over indexed point or polygon datasets. + @@ -630,7 +654,10 @@ CREATE FUNCTION my_inner_consistent(internal, internal) RETURNS void ... typedef struct spgInnerConsistentIn { ScanKey scankeys; /* array of operators and comparison values */ - int nkeys; /* length of array */ + ScanKey orderbys; /* array of ordering operators and comparison + * values */ + int nkeys; /* length of scankeys array */ + int norderbys; /* length of orderbys array */ Datum reconstructedValue; /* value reconstructed at parent */ void *traversalValue; /* opclass-specific traverse value */ @@ -653,6 +680,7 @@ typedef struct spgInnerConsistentOut int *levelAdds; /* increment level by this much for each */ Datum *reconstructedValues; /* associated reconstructed values */ void **traversalValues; /* opclass-specific traverse values */ + double **distances; /* associated distances */ } spgInnerConsistentOut; @@ -667,6 +695,8 @@ typedef struct spgInnerConsistentOut In particular it is not necessary to check sk_flags to see if the comparison value is NULL, because the SP-GiST core code will filter out such conditions. + The array orderbys, of length norderbys, + describes ordering operators (if any) in the same manner. reconstructedValue is the value reconstructed for the parent tuple; it is (Datum) 0 at the root level or if the inner_consistent function did not provide a value at the @@ -709,6 +739,10 @@ typedef struct spgInnerConsistentOut of spgConfigOut.leafType type reconstructed for each child node to be visited; otherwise, leave reconstructedValues as NULL. + If ordered search is performed, set distances + to an array of distance values according to orderbys + array (nodes with lowest distances will be processed first). Leave it + NULL otherwise. If it is desired to pass down additional out-of-band information (traverse values) to lower levels of the tree search, set traversalValues to an array of the appropriate @@ -717,6 +751,7 @@ typedef struct spgInnerConsistentOut Note that the inner_consistent function is responsible for palloc'ing the nodeNumbers, levelAdds, + distances, reconstructedValues, and traversalValues arrays in the current memory context. However, any output traverse values pointed to by @@ -747,7 +782,10 @@ CREATE FUNCTION my_leaf_consistent(internal, internal) RETURNS bool ... typedef struct spgLeafConsistentIn { ScanKey scankeys; /* array of operators and comparison values */ - int nkeys; /* length of array */ + ScanKey orderbys; /* array of ordering operators and comparison + * values */ + int nkeys; /* length of scankeys array */ + int norderbys; /* length of orderbys array */ Datum reconstructedValue; /* value reconstructed at parent */ void *traversalValue; /* opclass-specific traverse value */ @@ -759,8 +797,10 @@ typedef struct spgLeafConsistentIn typedef struct spgLeafConsistentOut { - Datum leafValue; /* reconstructed original data, if any */ - bool recheck; /* set true if operator must be rechecked */ + Datum leafValue; /* reconstructed original data, if any */ + bool recheck; /* set true if operator must be rechecked */ + bool recheckDistances; /* set true if distances must be rechecked */ + double *distances; /* associated distances */ } spgLeafConsistentOut; @@ -775,6 +815,8 @@ typedef struct spgLeafConsistentOut In particular it is not necessary to check sk_flags to see if the comparison value is NULL, because the SP-GiST core code will filter out such conditions. + The array orderbys, of length norderbys, + describes the ordering operators in the same manner. reconstructedValue is the value reconstructed for the parent tuple; it is (Datum) 0 at the root level or if the inner_consistent function did not provide a value at the @@ -803,6 +845,12 @@ typedef struct spgLeafConsistentOut recheck may be set to true if the match is uncertain and so the operator(s) must be re-applied to the actual heap tuple to verify the match. + If ordered search is performed, set distances + to an array of distance values according to orderbys + array. Leave it NULL otherwise. If at least one of returned distances + is not exact, set recheckDistances to true. + In this case, the executor will calculate the exact distances after + fetching the tuple from the heap, and will reorder the tuples if needed. diff --git a/doc/src/sgml/xindex.sgml b/doc/src/sgml/xindex.sgml index f7713e8abaf..9446f8b836c 100644 --- a/doc/src/sgml/xindex.sgml +++ b/doc/src/sgml/xindex.sgml @@ -1242,7 +1242,7 @@ SELECT sum(x) OVER (ORDER BY x RANGE BETWEEN 5 PRECEDING AND 10 FOLLOWING) Ordering Operators - Some index access methods (currently, only GiST) support the concept of + Some index access methods (currently, only GiST and SP-GiST) support the concept of ordering operators. What we have been discussing so far are search operators. A search operator is one for which the index can be searched to find all rows satisfying -- cgit v1.2.3