summaryrefslogtreecommitdiff
path: root/doc/src
diff options
context:
space:
mode:
Diffstat (limited to 'doc/src')
-rw-r--r--doc/src/sgml/catalogs.sgml120
-rw-r--r--doc/src/sgml/datatype.sgml11
-rw-r--r--doc/src/sgml/postgres.sgml2
-rw-r--r--doc/src/sgml/ref/select.sgml126
-rw-r--r--doc/src/sgml/tablesample-method.sgml322
-rw-r--r--doc/src/sgml/tsm-system-rows.sgml39
-rw-r--r--doc/src/sgml/tsm-system-time.sgml42
7 files changed, 374 insertions, 288 deletions
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 2c2190f13d3..9096ee5d517 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -279,11 +279,6 @@
</row>
<row>
- <entry><link linkend="catalog-pg-tablesample-method"><structname>pg_tablesample_method</structname></link></entry>
- <entry>table sampling methods</entry>
- </row>
-
- <row>
<entry><link linkend="catalog-pg-tablespace"><structname>pg_tablespace</structname></link></entry>
<entry>tablespaces within this database cluster</entry>
</row>
@@ -6132,121 +6127,6 @@
</sect1>
- <sect1 id="catalog-pg-tablesample-method">
- <title><structname>pg_tabesample_method</structname></title>
-
- <indexterm zone="catalog-pg-tablesample-method">
- <primary>pg_am</primary>
- </indexterm>
-
- <para>
- The catalog <structname>pg_tablesample_method</structname> stores
- information about table sampling methods which can be used in
- <command>TABLESAMPLE</command> clause of a <command>SELECT</command>
- statement.
- </para>
-
- <table>
- <title><structname>pg_tablesample_method</> Columns</title>
-
- <tgroup cols="4">
- <thead>
- <row>
- <entry>Name</entry>
- <entry>Type</entry>
- <entry>References</entry>
- <entry>Description</entry>
- </row>
- </thead>
- <tbody>
-
- <row>
- <entry><structfield>oid</structfield></entry>
- <entry><type>oid</type></entry>
- <entry></entry>
- <entry>Row identifier (hidden attribute; must be explicitly selected)</entry>
- </row>
-
- <row>
- <entry><structfield>tsmname</structfield></entry>
- <entry><type>name</type></entry>
- <entry></entry>
- <entry>Name of the sampling method</entry>
- </row>
-
- <row>
- <entry><structfield>tsmseqscan</structfield></entry>
- <entry><type>bool</type></entry>
- <entry></entry>
- <entry>If true, the sampling method scans the whole table sequentially.
- </entry>
- </row>
-
- <row>
- <entry><structfield>tsmpagemode</structfield></entry>
- <entry><type>bool</type></entry>
- <entry></entry>
- <entry>If true, the sampling method always reads the pages completely.
- </entry>
- </row>
-
- <row>
- <entry><structfield>tsminit</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry><quote>Initialize the sampling scan</quote> function</entry>
- </row>
-
- <row>
- <entry><structfield>tsmnextblock</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry><quote>Get next block number</quote> function</entry>
- </row>
-
- <row>
- <entry><structfield>tsmnexttuple</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry><quote>Get next tuple offset</quote> function</entry>
- </row>
-
- <row>
- <entry><structfield>tsmexaminetuple</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry>Function which examines the tuple contents and decides if to
- return it, or zero if none</entry>
- </row>
-
- <row>
- <entry><structfield>tsmend</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry><quote>End the sampling scan</quote> function</entry>
- </row>
-
- <row>
- <entry><structfield>tsmreset</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry><quote>Restart the state of sampling scan</quote> function</entry>
- </row>
-
- <row>
- <entry><structfield>tsmcost</structfield></entry>
- <entry><type>regproc</type></entry>
- <entry><literal><link linkend="catalog-pg-proc"><structname>pg_proc</structname></link>.oid</literal></entry>
- <entry>Costing function</entry>
- </row>
-
- </tbody>
- </tgroup>
- </table>
-
- </sect1>
-
-
<sect1 id="catalog-pg-tablespace">
<title><structname>pg_tablespace</structname></title>
diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 8e13555a3aa..8113ddf8179 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4346,7 +4346,7 @@ SET xmloption TO { DOCUMENT | CONTENT };
an object identifier. There are also several alias types for
<type>oid</>: <type>regproc</>, <type>regprocedure</>,
<type>regoper</>, <type>regoperator</>, <type>regclass</>,
- <type>regtype</>, <type>regrole</>, <type>regnamespace</>,
+ <type>regtype</>, <type>regrole</>, <type>regnamespace</>,
<type>regconfig</>, and <type>regdictionary</>.
<xref linkend="datatype-oid-table"> shows an overview.
</para>
@@ -4623,6 +4623,10 @@ SELECT * FROM pg_attribute
</indexterm>
<indexterm zone="datatype-pseudo">
+ <primary>tsm_handler</primary>
+ </indexterm>
+
+ <indexterm zone="datatype-pseudo">
<primary>cstring</primary>
</indexterm>
@@ -4717,6 +4721,11 @@ SELECT * FROM pg_attribute
</row>
<row>
+ <entry><type>tsm_handler</></entry>
+ <entry>A tablesample method handler is declared to return <type>tsm_handler</>.</entry>
+ </row>
+
+ <row>
<entry><type>record</></entry>
<entry>Identifies a function returning an unspecified row type.</entry>
</row>
diff --git a/doc/src/sgml/postgres.sgml b/doc/src/sgml/postgres.sgml
index d1703e9c01f..7e82cdc3b12 100644
--- a/doc/src/sgml/postgres.sgml
+++ b/doc/src/sgml/postgres.sgml
@@ -243,6 +243,7 @@
&nls;
&plhandler;
&fdwhandler;
+ &tablesample-method;
&custom-scan;
&geqo;
&indexam;
@@ -250,7 +251,6 @@
&spgist;
&gin;
&brin;
- &tablesample-method;
&storage;
&bki;
&planstats;
diff --git a/doc/src/sgml/ref/select.sgml b/doc/src/sgml/ref/select.sgml
index 632d7935cb4..44810f4909c 100644
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
@@ -49,7 +49,8 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="parameter">expression</replac
<phrase>where <replaceable class="parameter">from_item</replaceable> can be one of:</phrase>
- [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ] [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ] [ TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ] ]
+ [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ] [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ]
+ [ TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ] ]
[ LATERAL ] ( <replaceable class="parameter">select</replaceable> ) [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ]
<replaceable class="parameter">with_query_name</replaceable> [ [ AS ] <replaceable class="parameter">alias</replaceable> [ ( <replaceable class="parameter">column_alias</replaceable> [, ...] ) ] ]
[ LATERAL ] <replaceable class="parameter">function_name</replaceable> ( [ <replaceable class="parameter">argument</replaceable> [, ...] ] )
@@ -326,50 +327,6 @@ TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ]
</varlistentry>
<varlistentry>
- <term>TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ]</term>
- <listitem>
- <para>
- Table sample clause after
- <replaceable class="parameter">table_name</replaceable> indicates that
- a <replaceable class="parameter">sampling_method</replaceable> should
- be used to retrieve subset of rows in the table.
- The <replaceable class="parameter">sampling_method</replaceable> can be
- any sampling method installed in the database. There are currently two
- sampling methods available in the standard
- <productname>PostgreSQL</productname> distribution:
- <itemizedlist>
- <listitem>
- <para><literal>SYSTEM</literal></para>
- </listitem>
- <listitem>
- <para><literal>BERNOULLI</literal></para>
- </listitem>
- </itemizedlist>
- Both of these sampling methods currently accept only single argument
- which is the percent (floating point from 0 to 100) of the rows to
- be returned.
- The <literal>SYSTEM</literal> sampling method does block level
- sampling with each block having the same chance of being selected and
- returns all rows from each selected block.
- The <literal>BERNOULLI</literal> scans whole table and returns
- individual rows with equal probability. Additional sampling methods
- may be installed in the database via extensions.
- </para>
- <para>
- The optional parameter <literal>REPEATABLE</literal> uses the seed
- parameter, which can be a number or expression producing a number, as
- a random seed for sampling. Note that subsequent commands may return
- different results even if same <literal>REPEATABLE</literal> clause was
- specified. This happens because <acronym>DML</acronym> statements and
- maintenance operations such as <command>VACUUM</> may affect physical
- distribution of data. The <function>setseed()</> function will not
- affect the sampling result when the <literal>REPEATABLE</literal>
- parameter is used.
- </para>
- </listitem>
- </varlistentry>
-
- <varlistentry>
<term><replaceable class="parameter">alias</replaceable></term>
<listitem>
<para>
@@ -388,6 +345,61 @@ TABLE [ ONLY ] <replaceable class="parameter">table_name</replaceable> [ * ]
</varlistentry>
<varlistentry>
+ <term><literal>TABLESAMPLE <replaceable class="parameter">sampling_method</replaceable> ( <replaceable class="parameter">argument</replaceable> [, ...] ) [ REPEATABLE ( <replaceable class="parameter">seed</replaceable> ) ]</literal></term>
+ <listitem>
+ <para>
+ A <literal>TABLESAMPLE</> clause after
+ a <replaceable class="parameter">table_name</> indicates that the
+ specified <replaceable class="parameter">sampling_method</replaceable>
+ should be used to retrieve a subset of the rows in that table.
+ This sampling precedes the application of any other filters such
+ as <literal>WHERE</> clauses.
+ The standard <productname>PostgreSQL</productname> distribution
+ includes two sampling methods, <literal>BERNOULLI</literal>
+ and <literal>SYSTEM</literal>, and other sampling methods can be
+ installed in the database via extensions.
+ </para>
+
+ <para>
+ The <literal>BERNOULLI</> and <literal>SYSTEM</> sampling methods
+ each accept a single <replaceable class="parameter">argument</>
+ which is the fraction of the table to sample, expressed as a
+ percentage between 0 and 100. This argument can be
+ any <type>real</>-valued expression. (Other sampling methods might
+ accept more or different arguments.) These two methods each return
+ a randomly-chosen sample of the table that will contain
+ approximately the specified percentage of the table's rows.
+ The <literal>BERNOULLI</literal> method scans the whole table and
+ selects or ignores individual rows independently with the specified
+ probability.
+ The <literal>SYSTEM</literal> method does block-level sampling with
+ each block having the specified chance of being selected; all rows
+ in each selected block are returned.
+ The <literal>SYSTEM</literal> method is significantly faster than
+ the <literal>BERNOULLI</literal> method when small sampling
+ percentages are specified, but it may return a less-random sample of
+ the table as a result of clustering effects.
+ </para>
+
+ <para>
+ The optional <literal>REPEATABLE</literal> clause specifies
+ a <replaceable class="parameter">seed</> number or expression to use
+ for generating random numbers within the sampling method. The seed
+ value can be any non-null floating-point value. Two queries that
+ specify the same seed and <replaceable class="parameter">argument</>
+ values will select the same sample of the table, if the table has
+ not been changed meanwhile. But different seed values will usually
+ produce different samples.
+ If <literal>REPEATABLE</literal> is not given then a new random
+ sample is selected for each query.
+ Note that some add-on sampling methods do not
+ accept <literal>REPEATABLE</literal>, and will always produce new
+ samples on each use.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
<term><replaceable class="parameter">select</replaceable></term>
<listitem>
<para>
@@ -1871,6 +1883,16 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
</refsect2>
<refsect2>
+ <title><literal>TABLESAMPLE</literal> Clause Restrictions</title>
+
+ <para>
+ The <literal>TABLESAMPLE</> clause is currently accepted only on
+ regular tables and materialized views. According to the SQL standard
+ it should be possible to apply it to any <literal>FROM</> item.
+ </para>
+ </refsect2>
+
+ <refsect2>
<title>Function Calls in <literal>FROM</literal></title>
<para>
@@ -1993,19 +2015,5 @@ SELECT distributors.* WHERE distributors.name = 'Westward';
</para>
</refsect2>
- <refsect2>
- <title><literal>TABLESAMPLE</literal> clause</title>
-
- <para>
- The <literal>TABLESAMPLE</> clause is currently accepted only on physical
- relations and materialized views.
- </para>
-
- <para>
- Additional modules allow you to install custom sampling methods and use
- them instead of the SQL standard methods.
- </para>
- </refsect2>
-
</refsect1>
</refentry>
diff --git a/doc/src/sgml/tablesample-method.sgml b/doc/src/sgml/tablesample-method.sgml
index 48eb7fe84ea..22f8bbe19aa 100644
--- a/doc/src/sgml/tablesample-method.sgml
+++ b/doc/src/sgml/tablesample-method.sgml
@@ -1,139 +1,301 @@
<!-- doc/src/sgml/tablesample-method.sgml -->
<chapter id="tablesample-method">
- <title>Writing A TABLESAMPLE Sampling Method</title>
+ <title>Writing A Table Sampling Method</title>
<indexterm zone="tablesample-method">
- <primary>tablesample method</primary>
+ <primary>table sampling method</primary>
+ </indexterm>
+
+ <indexterm zone="tablesample-method">
+ <primary><literal>TABLESAMPLE</literal> method</primary>
</indexterm>
<para>
- The <command>TABLESAMPLE</command> clause implementation in
- <productname>PostgreSQL</> supports creating a custom sampling methods.
- These methods control what sample of the table will be returned when the
- <command>TABLESAMPLE</command> clause is used.
+ <productname>PostgreSQL</>'s implementation of the <literal>TABLESAMPLE</>
+ clause supports custom table sampling methods, in addition to
+ the <literal>BERNOULLI</> and <literal>SYSTEM</> methods that are required
+ by the SQL standard. The sampling method determines which rows of the
+ table will be selected when the <literal>TABLESAMPLE</> clause is used.
</para>
- <sect1 id="tablesample-method-functions">
- <title>Tablesample Method Functions</title>
+ <para>
+ At the SQL level, a table sampling method is represented by a single SQL
+ function, typically implemented in C, having the signature
+<programlisting>
+method_name(internal) RETURNS tsm_handler
+</programlisting>
+ The name of the function is the same method name appearing in the
+ <literal>TABLESAMPLE</> clause. The <type>internal</> argument is a dummy
+ (always having value zero) that simply serves to prevent this function from
+ being called directly from a SQL command.
+ The result of the function must be a palloc'd struct of
+ type <type>TsmRoutine</>, which contains pointers to support functions for
+ the sampling method. These support functions are plain C functions and
+ are not visible or callable at the SQL level. The support functions are
+ described in <xref linkend="tablesample-support-functions">.
+ </para>
+
+ <para>
+ In addition to function pointers, the <type>TsmRoutine</> struct must
+ provide these additional fields:
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term><literal>List *parameterTypes</literal></term>
+ <listitem>
+ <para>
+ This is an OID list containing the data type OIDs of the parameter(s)
+ that will be accepted by the <literal>TABLESAMPLE</> clause when this
+ sampling method is used. For example, for the built-in methods, this
+ list contains a single item with value <literal>FLOAT4OID</>, which
+ represents the sampling percentage. Custom sampling methods can have
+ more or different parameters.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>bool repeatable_across_queries</literal></term>
+ <listitem>
+ <para>
+ If <literal>true</>, the sampling method can deliver identical samples
+ across successive queries, if the same parameters
+ and <literal>REPEATABLE</> seed value are supplied each time and the
+ table contents have not changed. When this is <literal>false</>,
+ the <literal>REPEATABLE</> clause is not accepted for use with the
+ sampling method.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><literal>bool repeatable_across_scans</literal></term>
+ <listitem>
+ <para>
+ If <literal>true</>, the sampling method can deliver identical samples
+ across successive scans in the same query (assuming unchanging
+ parameters, seed value, and snapshot).
+ When this is <literal>false</>, the planner will not select plans that
+ would require scanning the sampled table more than once, since that
+ might result in inconsistent query output.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ The <type>TsmRoutine</> struct type is declared
+ in <filename>src/include/access/tsmapi.h</>, which see for additional
+ details.
+ </para>
+
+ <para>
+ The table sampling methods included in the standard distribution are good
+ references when trying to write your own. Look into
+ the <filename>src/backend/access/tablesample</> subdirectory of the source
+ tree for the built-in sampling methods, and into the <filename>contrib</>
+ subdirectory for add-on methods.
+ </para>
+
+ <sect1 id="tablesample-support-functions">
+ <title>Sampling Method Support Functions</title>
<para>
- The tablesample method must provide following set of functions:
+ The TSM handler function returns a palloc'd <type>TsmRoutine</> struct
+ containing pointers to the support functions described below. Most of
+ the functions are required, but some are optional, and those pointers can
+ be NULL.
</para>
<para>
<programlisting>
void
-tsm_init (TableSampleDesc *desc,
- uint32 seed, ...);
+SampleScanGetSampleSize (PlannerInfo *root,
+ RelOptInfo *baserel,
+ List *paramexprs,
+ BlockNumber *pages,
+ double *tuples);
</programlisting>
- Initialize the tablesample scan. The function is called at the beginning
- of each relation scan.
+
+ This function is called during planning. It must estimate the number of
+ relation pages that will be read during a sample scan, and the number of
+ tuples that will be selected by the scan. (For example, these might be
+ determined by estimating the sampling fraction, and then multiplying
+ the <literal>baserel-&gt;pages</> and <literal>baserel-&gt;tuples</>
+ numbers by that, being sure to round the results to integral values.)
+ The <literal>paramexprs</> list holds the expression(s) that are
+ parameters to the <literal>TABLESAMPLE</> clause. It is recommended to
+ use <function>estimate_expression_value()</> to try to reduce these
+ expressions to constants, if their values are needed for estimation
+ purposes; but the function must provide size estimates even if they cannot
+ be reduced, and it should not fail even if the values appear invalid
+ (remember that they're only estimates of what the run-time values will be).
+ The <literal>pages</> and <literal>tuples</> parameters are outputs.
</para>
+
<para>
- Note that the first two parameters are required but you can specify
- additional parameters which then will be used by the <command>TABLESAMPLE</>
- clause to determine the required user input in the query itself.
- This means that if your function will specify additional float4 parameter
- named percent, the user will have to call the tablesample method with
- expression which evaluates (or can be coerced) to float4.
- For example this definition:
<programlisting>
-tsm_init (TableSampleDesc *desc,
- uint32 seed, float4 pct);
-</programlisting>
-Will lead to SQL call like this:
-<programlisting>
-... TABLESAMPLE yourmethod(0.5) ...
+void
+InitSampleScan (SampleScanState *node,
+ int eflags);
</programlisting>
+
+ Initialize for execution of a SampleScan plan node.
+ This is called during executor startup.
+ It should perform any initialization needed before processing can start.
+ The <structname>SampleScanState</> node has already been created, but
+ its <structfield>tsm_state</> field is NULL.
+ The <function>InitSampleScan</> function can palloc whatever internal
+ state data is needed by the sampling method, and store a pointer to
+ it in <literal>node-&gt;tsm_state</>.
+ Information about the table to scan is accessible through other fields
+ of the <structname>SampleScanState</> node (but note that the
+ <literal>node-&gt;ss.ss_currentScanDesc</> scan descriptor is not set
+ up yet).
+ <literal>eflags</> contains flag bits describing the executor's
+ operating mode for this plan node.
</para>
<para>
-<programlisting>
-BlockNumber
-tsm_nextblock (TableSampleDesc *desc);
-</programlisting>
- Returns the block number of next page to be scanned. InvalidBlockNumber
- should be returned if the sampling has reached end of the relation.
+ When <literal>(eflags &amp; EXEC_FLAG_EXPLAIN_ONLY)</> is true,
+ the scan will not actually be performed, so this function should only do
+ the minimum required to make the node state valid for <command>EXPLAIN</>
+ and <function>EndSampleScan</>.
</para>
<para>
-<programlisting>
-OffsetNumber
-tsm_nexttuple (TableSampleDesc *desc, BlockNumber blockno,
- OffsetNumber maxoffset);
-</programlisting>
- Return next tuple offset for the current page. InvalidOffsetNumber should
- be returned if the sampling has reached end of the page.
+ This function can be omitted (set the pointer to NULL), in which case
+ <function>BeginSampleScan</> must perform all initialization needed
+ by the sampling method.
</para>
<para>
<programlisting>
void
-tsm_end (TableSampleDesc *desc);
+BeginSampleScan (SampleScanState *node,
+ Datum *params,
+ int nparams,
+ uint32 seed);
</programlisting>
- The scan has finished, cleanup any left over state.
+
+ Begin execution of a sampling scan.
+ This is called just before the first attempt to fetch a tuple, and
+ may be called again if the scan needs to be restarted.
+ Information about the table to scan is accessible through fields
+ of the <structname>SampleScanState</> node (but note that the
+ <literal>node-&gt;ss.ss_currentScanDesc</> scan descriptor is not set
+ up yet).
+ The <literal>params</> array, of length <literal>nparams</>, contains the
+ values of the parameters supplied in the <literal>TABLESAMPLE</> clause.
+ These will have the number and types specified in the sampling
+ method's <literal>parameterTypes</literal> list, and have been checked
+ to not be null.
+ <literal>seed</> contains a seed to use for any random numbers generated
+ within the sampling method; it is either a hash derived from the
+ <literal>REPEATABLE</> value if one was given, or the result
+ of <literal>random()</> if not.
</para>
<para>
-<programlisting>
-void
-tsm_reset (TableSampleDesc *desc);
-</programlisting>
- The scan needs to rescan the relation again, reset any tablesample method
- state.
+ This function may adjust the fields <literal>node-&gt;use_bulkread</>
+ and <literal>node-&gt;use_pagemode</>.
+ If <literal>node-&gt;use_bulkread</> is <literal>true</>, which it is by
+ default, the scan will use a buffer access strategy that encourages
+ recycling buffers after use. It might be reasonable to set this
+ to <literal>false</> if the scan will visit only a small fraction of the
+ table's pages.
+ If <literal>node-&gt;use_pagemode</> is <literal>true</>, which it is by
+ default, the scan will perform visibility checking in a single pass for
+ all tuples on each visited page. It might be reasonable to set this
+ to <literal>false</> if the scan will select only a small fraction of the
+ tuples on each visited page. That will result in fewer tuple visibility
+ checks being performed, though each one will be more expensive because it
+ will require more locking.
+ </para>
+
+ <para>
+ If the sampling method is
+ marked <literal>repeatable_across_scans</literal>, it must be able to
+ select the same set of tuples during a rescan as it did originally, that is
+ a fresh call of <function>BeginSampleScan</> must lead to selecting the
+ same tuples as before (if the <literal>TABLESAMPLE</> parameters
+ and seed don't change).
</para>
<para>
<programlisting>
-void
-tsm_cost (PlannerInfo *root, Path *path, RelOptInfo *baserel,
- List *args, BlockNumber *pages, double *tuples);
+BlockNumber
+NextSampleBlock (SampleScanState *node);
</programlisting>
- This function is used by optimizer to decide best plan and is also used
- for output of <command>EXPLAIN</>.
+
+ Returns the block number of the next page to be scanned, or
+ <literal>InvalidBlockNumber</> if no pages remain to be scanned.
</para>
<para>
- There is one more function which tablesampling method can implement in order
- to gain more fine grained control over sampling. This function is optional:
+ This function can be omitted (set the pointer to NULL), in which case
+ the core code will perform a sequential scan of the entire relation.
+ Such a scan can use synchronized scanning, so that the sampling method
+ cannot assume that the relation pages are visited in the same order on
+ each scan.
</para>
<para>
<programlisting>
-bool
-tsm_examinetuple (TableSampleDesc *desc, BlockNumber blockno,
- HeapTuple tuple, bool visible);
+OffsetNumber
+NextSampleTuple (SampleScanState *node,
+ BlockNumber blockno,
+ OffsetNumber maxoffset);
</programlisting>
- Function that enables the sampling method to examine contents of the tuple
- (for example to collect some internal statistics). The return value of this
- function is used to determine if the tuple should be returned to client.
- Note that this function will receive even invisible tuples but it is not
- allowed to return true for such tuple (if it does,
- <productname>PostgreSQL</> will raise an error).
+
+ Returns the offset number of the next tuple to be sampled on the
+ specified page, or <literal>InvalidOffsetNumber</> if no tuples remain to
+ be sampled. <literal>maxoffset</> is the largest offset number in use
+ on the page.
</para>
+ <note>
+ <para>
+ <function>NextSampleTuple</> is not explicitly told which of the offset
+ numbers in the range <literal>1 .. maxoffset</> actually contain valid
+ tuples. This is not normally a problem since the core code ignores
+ requests to sample missing or invisible tuples; that should not result in
+ any bias in the sample. However, if necessary, the function can
+ examine <literal>node-&gt;ss.ss_currentScanDesc-&gt;rs_vistuples[]</>
+ to identify which tuples are valid and visible. (This
+ requires <literal>node-&gt;use_pagemode</> to be <literal>true</>.)
+ </para>
+ </note>
+
+ <note>
+ <para>
+ <function>NextSampleTuple</> must <emphasis>not</> assume
+ that <literal>blockno</> is the same page number returned by the most
+ recent <function>NextSampleBlock</> call. It was returned by some
+ previous <function>NextSampleBlock</> call, but the core code is allowed
+ to call <function>NextSampleBlock</> in advance of actually scanning
+ pages, so as to support prefetching. It is OK to assume that once
+ sampling of a given page begins, successive <function>NextSampleTuple</>
+ calls all refer to the same page until <literal>InvalidOffsetNumber</> is
+ returned.
+ </para>
+ </note>
+
<para>
- As you can see most of the tablesample method interfaces get the
- <structname>TableSampleDesc</> as a first parameter. This structure holds
- state of the current scan and also provides storage for the tablesample
- method's state. It is defined as following:
<programlisting>
-typedef struct TableSampleDesc {
- HeapScanDesc heapScan;
- TupleDesc tupDesc;
-
- void *tsmdata;
-} TableSampleDesc;
+void
+EndSampleScan (SampleScanState *node);
</programlisting>
- Where <structfield>heapScan</> is the descriptor of the physical table scan.
- It's possible to get table size info from it. The <structfield>tupDesc</>
- represents the tuple descriptor of the tuples returned by the scan and passed
- to the <function>tsm_examinetuple()</> interface. The <structfield>tsmdata</>
- can be used by tablesample method itself to store any state info it might
- need during the scan. If used by the method, it should be <function>pfree</>d
- in <function>tsm_end()</> function.
+
+ End the scan and release resources. It is normally not important
+ to release palloc'd memory, but any externally-visible resources
+ should be cleaned up.
+ This function can be omitted (set the pointer to NULL) in the common
+ case where no such resources exist.
</para>
+
</sect1>
</chapter>
diff --git a/doc/src/sgml/tsm-system-rows.sgml b/doc/src/sgml/tsm-system-rows.sgml
index 0c2f1779c9a..93aa5366649 100644
--- a/doc/src/sgml/tsm-system-rows.sgml
+++ b/doc/src/sgml/tsm-system-rows.sgml
@@ -8,24 +8,37 @@
</indexterm>
<para>
- The <filename>tsm_system_rows</> module provides the tablesample method
- <literal>SYSTEM_ROWS</literal>, which can be used inside the
- <command>TABLESAMPLE</command> clause of a <command>SELECT</command>.
+ The <filename>tsm_system_rows</> module provides the table sampling method
+ <literal>SYSTEM_ROWS</literal>, which can be used in
+ the <literal>TABLESAMPLE</> clause of a <xref linkend="sql-select">
+ command.
</para>
<para>
- This tablesample method uses a linear probing algorithm to read sample
- of a table and uses actual number of rows as limit (unlike the
- <literal>SYSTEM</literal> tablesample method which limits by percentage
- of a table).
+ This table sampling method accepts a single integer argument that is the
+ maximum number of rows to read. The resulting sample will always contain
+ exactly that many rows, unless the table does not contain enough rows, in
+ which case the whole table is selected.
+ </para>
+
+ <para>
+ Like the built-in <literal>SYSTEM</literal> sampling
+ method, <literal>SYSTEM_ROWS</literal> performs block-level sampling, so
+ that the sample is not completely random but may be subject to clustering
+ effects, especially if only a small number of rows are requested.
+ </para>
+
+ <para>
+ <literal>SYSTEM_ROWS</literal> does not support
+ the <literal>REPEATABLE</literal> clause.
</para>
<sect2>
<title>Examples</title>
<para>
- Here is an example of selecting sample of a table with
- <literal>SYSTEM_ROWS</>. First install the extension:
+ Here is an example of selecting a sample of a table with
+ <literal>SYSTEM_ROWS</>. First install the extension:
</para>
<programlisting>
@@ -33,8 +46,7 @@ CREATE EXTENSION tsm_system_rows;
</programlisting>
<para>
- Then you can use it in <command>SELECT</command> command same way as other
- tablesample methods:
+ Then you can use it in a <command>SELECT</command> command, for instance:
<programlisting>
SELECT * FROM my_table TABLESAMPLE SYSTEM_ROWS(100);
@@ -42,8 +54,9 @@ SELECT * FROM my_table TABLESAMPLE SYSTEM_ROWS(100);
</para>
<para>
- The above command will return a sample of 100 rows from the table my_table
- (less if the table does not have 100 visible rows).
+ This command will return a sample of 100 rows from the
+ table <structname>my_table</> (unless the table does not have 100
+ visible rows, in which case all its rows are returned).
</para>
</sect2>
diff --git a/doc/src/sgml/tsm-system-time.sgml b/doc/src/sgml/tsm-system-time.sgml
index 2343ab16d4f..3f8ff1a026f 100644
--- a/doc/src/sgml/tsm-system-time.sgml
+++ b/doc/src/sgml/tsm-system-time.sgml
@@ -8,25 +8,39 @@
</indexterm>
<para>
- The <filename>tsm_system_time</> module provides the tablesample method
- <literal>SYSTEM_TIME</literal>, which can be used inside the
- <command>TABLESAMPLE</command> clause of a <command>SELECT</command>.
+ The <filename>tsm_system_time</> module provides the table sampling method
+ <literal>SYSTEM_TIME</literal>, which can be used in
+ the <literal>TABLESAMPLE</> clause of a <xref linkend="sql-select">
+ command.
</para>
<para>
- This tablesample method uses a linear probing algorithm to read sample
- of a table and uses time in milliseconds as limit (unlike the
- <literal>SYSTEM</literal> tablesample method which limits by percentage
- of a table). This gives you some control over the length of execution
- of your query.
+ This table sampling method accepts a single floating-point argument that
+ is the maximum number of milliseconds to spend reading the table. This
+ gives you direct control over how long the query takes, at the price that
+ the size of the sample becomes hard to predict. The resulting sample will
+ contain as many rows as could be read in the specified time, unless the
+ whole table has been read first.
+ </para>
+
+ <para>
+ Like the built-in <literal>SYSTEM</literal> sampling
+ method, <literal>SYSTEM_TIME</literal> performs block-level sampling, so
+ that the sample is not completely random but may be subject to clustering
+ effects, especially if only a small number of rows are selected.
+ </para>
+
+ <para>
+ <literal>SYSTEM_TIME</literal> does not support
+ the <literal>REPEATABLE</literal> clause.
</para>
<sect2>
<title>Examples</title>
<para>
- Here is an example of selecting sample of a table with
- <literal>SYSTEM_TIME</>. First install the extension:
+ Here is an example of selecting a sample of a table with
+ <literal>SYSTEM_TIME</>. First install the extension:
</para>
<programlisting>
@@ -34,8 +48,7 @@ CREATE EXTENSION tsm_system_time;
</programlisting>
<para>
- Then you can use it in a <command>SELECT</command> command the same way as
- other tablesample methods:
+ Then you can use it in a <command>SELECT</command> command, for instance:
<programlisting>
SELECT * FROM my_table TABLESAMPLE SYSTEM_TIME(1000);
@@ -43,8 +56,9 @@ SELECT * FROM my_table TABLESAMPLE SYSTEM_TIME(1000);
</para>
<para>
- The above command will return as large a sample of my_table as it can read in
- 1 second (or less if it reads whole table faster).
+ This command will return as large a sample of <structname>my_table</> as
+ it can read in 1 second (1000 milliseconds). Of course, if the whole
+ table can be read in under 1 second, all its rows will be returned.
</para>
</sect2>