intarray
 
 
  intarray
 
 
  This is an implementation of RD-tree data structure using GiST interface
  of PostgreSQL. It has built-in lossy compression.
 
 
  Current implementation provides index support for one-dimensional array of
  int4's - gist__int_ops, suitable for small and medium size of arrays (used on
  default), and gist__intbig_ops for indexing large arrays (we use superimposed
  signature with length of 4096 bits to represent sets).
 
 
  Functions
  
   
    
     int   icount(int[]) - the number of elements in intarray
    
    
test=# select icount('{1,2,3}'::int[]);  
 icount
--------
      3
(1 row)
    
   
   
    
     int[] sort(int[], 'asc' | 'desc') - sort intarray
    
    
test=# select sort('{1,2,3}'::int[],'desc');
  sort  
---------
 {3,2,1}
(1 row)
    
   
   
    
     int[] sort(int[]) - sort in ascending order
    
   
   
    
     int[] sort_asc(int[]),sort_desc(int[]) - shortcuts for sort
    
   
   
    
     int[] uniq(int[]) - returns unique elements
    
    
test=# select uniq(sort('{1,2,3,2,1}'::int[]));
  uniq  
---------
 {1,2,3}
(1 row)
    
   
   
    
     int   idx(int[], int item) - returns index of first
      intarray matching element to item, or '0' if matching failed.
    
    
test=# select idx('{1,2,3,2,1}'::int[],2);
 idx
-----
   2
(1 row)
    
   
   
    
     int[] subarray(int[],int START [, int LEN]) - returns
    part of intarray starting from element number START (from 1) and length LEN.
    
    
test=# select subarray('{1,2,3,2,1}'::int[],2,3);
 subarray
----------
 {2,3,2}
(1 row)
    
   
   
    
     int[] intset(int4) - casting int4 to int[]
    
    
test=# select intset(1);
 intset
--------
 {1}
(1 row)
    
   
  
 
 
  Operations
  
   Operations
   
    
     
      Operator
      Description
     
    
    
     
      int[] && int[]
      overlap - returns TRUE if arrays have at least one common element
     
     
      int[] @> int[]
      contains - returns TRUE if left array contains right array
     
     
      int[] <@ int[]
      contained - returns TRUE if left array is contained in right array
     
     
      # int[]
      returns the number of elements in array
     
     
      int[] + int
      push element to array ( add to end of array)
     
     
      int[] + int[]  
      merge of arrays (right array added to the end of left one)
     
     
      int[] - int
      remove entries matched by right argument from array
     
     
      int[] - int[]
      remove right array from left
     
     
      int[] | int
      returns intarray - union of arguments
     
     
      int[] | int[]
      returns intarray as a union of two arrays
     
     
      int[] & int[]
      returns intersection of arrays
     
     
      int[] @@ query_int
      
       returns TRUE if array satisfies query (like
       '1&(2|3)')
      
     
     
      query_int ~~ int[]
      returns TRUE if array satisfies query (commutator of @@)
     
    
   
  
  
   (Before PostgreSQL 8.2, the containment operators @> and <@ were
   respectively called @ and ~.  These names are still available, but are
   deprecated and will eventually be retired.  Notice that the old names
   are reversed from the convention formerly followed by the core geometric
   datatypes!)
  
 
 
  Example
  
CREATE TABLE message (mid INT NOT NULL,sections INT[]);
CREATE TABLE message_section_map (mid INT NOT NULL,sid INT NOT NULL);
-- create indices
CREATE unique index message_key ON message ( mid );
CREATE unique index message_section_map_key2 ON message_section_map (sid, mid );
CREATE INDEX message_rdtree_idx ON message USING GIST ( sections gist__int_ops);
-- select some messages with section in 1 OR 2 - OVERLAP operator
SELECT message.mid FROM message WHERE message.sections && '{1,2}'; 
-- select messages contains in sections 1 AND 2 - CONTAINS operator
SELECT message.mid FROM message WHERE message.sections @> '{1,2}';
-- the same, CONTAINED operator
SELECT message.mid FROM message WHERE '{1,2}' <@ message.sections;
  
 
 
  Benchmark
  
  subdirectory bench contains benchmark suite.
  
  
  cd ./bench
  1. createdb TEST
  2. psql TEST < ../_int.sql
  3. ./create_test.pl | psql TEST
  4. ./bench.pl - perl script to benchmark queries, supports OR, AND queries
                  with/without RD-Tree. Run script without arguments to
                  see availbale options.
     a)test without RD-Tree (OR)
       ./bench.pl -d TEST -c -s 1,2 -v
     b)test with RD-Tree
       ./bench.pl -d TEST -c -s 1,2 -v -r
     BENCHMARKS:
    
     Size of table <message>: 200000
     Size of table <message_section_map>: 269133
    
     Distribution of messages by sections:
    
     section 0: 74377 messages
     section 1: 16284 messages
     section 50: 1229 messages
     section 99: 683 messages
    
     old - without RD-Tree support,
     new - with RD-Tree
    
     +----------+---------------+----------------+
     |Search set|OR, time in sec|AND, time in sec|
     |          +-------+-------+--------+-------+
     |          |  old  |  new  |   old  |  new  |
     +----------+-------+-------+--------+-------+
     |         1|  0.625|  0.101|       -|      -|
     +----------+-------+-------+--------+-------+
     |        99|  0.018|  0.017|       -|      -|
     +----------+-------+-------+--------+-------+
     |       1,2|  0.766|  0.133|   0.628|  0.045|
     +----------+-------+-------+--------+-------+
     | 1,2,50,65|  0.794|  0.141|   0.030|  0.006|
     +----------+-------+-------+--------+-------+
  
 
 
  Authors
  
   All work was done by Teodor Sigaev (teodor@stack.net) and Oleg
   Bartunov (oleg@sai.msu.su). See
    for
   additional information. Andrey Oktyabrski did a great work on adding new
   functions and operations.