diff options
| author | Tom Lane <tgl@sss.pgh.pa.us> | 2019-04-05 19:20:30 -0400 |
|---|---|---|
| committer | Tom Lane <tgl@sss.pgh.pa.us> | 2019-04-05 19:20:43 -0400 |
| commit | 959d00e9dbe4cfcf4a63bb655ac2c29a5e579246 (patch) | |
| tree | 4c260f752780d317d1f2ebab8aae553dc4dc8236 /src/include | |
| parent | 9f06d79ef831ffa333f908f6d3debdb654292414 (diff) | |
Use Append rather than MergeAppend for scanning ordered partitions.
If we need ordered output from a scan of a partitioned table, but
the ordering matches the partition ordering, then we don't need to
use a MergeAppend to combine the pre-ordered per-partition scan
results: a plain Append will produce the same results. This
both saves useless comparison work inside the MergeAppend proper,
and allows us to start returning tuples after istarting up just
the first child node not all of them.
However, all is not peaches and cream, because if some of the
child nodes have high startup costs then there will be big
discontinuities in the tuples-returned-versus-elapsed-time curve.
The planner's cost model cannot handle that (yet, anyway).
If we model the Append's startup cost as being just the first
child's startup cost, we may drastically underestimate the cost
of fetching slightly more tuples than are available from the first
child. Since we've had bad experiences with over-optimistic choices
of "fast start" plans for ORDER BY LIMIT queries, that seems scary.
As a klugy workaround, set the startup cost estimate for an ordered
Append to be the sum of its children's startup costs (as MergeAppend
would). This doesn't really describe reality, but it's less likely
to cause a bad plan choice than an underestimated startup cost would.
In practice, the cases where we really care about this optimization
will have child plans that are IndexScans with zero startup cost,
so that the overly conservative estimate is still just zero.
David Rowley, reviewed by Julien Rouhaud and Antonin Houska
Discussion: https://postgr.es/m/CAKJS1f-hAqhPLRk_RaSFTgYxd=Tz5hA7kQ2h4-DhJufQk8TGuw@mail.gmail.com
Diffstat (limited to 'src/include')
| -rw-r--r-- | src/include/nodes/pathnodes.h | 9 | ||||
| -rw-r--r-- | src/include/optimizer/pathnode.h | 2 | ||||
| -rw-r--r-- | src/include/optimizer/paths.h | 2 | ||||
| -rw-r--r-- | src/include/partitioning/partbounds.h | 1 |
4 files changed, 11 insertions, 3 deletions
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 15314a8cfe0..f8429df9e1f 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -280,6 +280,11 @@ struct PlannerInfo List *join_info_list; /* list of SpecialJoinInfos */ + /* + * Note: for AppendRelInfos describing partitions of a partitioned table, + * we guarantee that partitions that come earlier in the partitioned + * table's PartitionDesc will appear earlier in append_rel_list. + */ List *append_rel_list; /* list of AppendRelInfos */ List *rowMarks; /* list of PlanRowMarks */ @@ -1363,9 +1368,9 @@ typedef struct AppendPath /* RT indexes of non-leaf tables in a partition tree */ List *partitioned_rels; List *subpaths; /* list of component Paths */ - - /* Index of first partial path in subpaths */ + /* Index of first partial path in subpaths; list_length(subpaths) if none */ int first_partial_path; + double limit_tuples; /* hard limit on output tuples, or -1 */ } AppendPath; #define IS_DUMMY_APPEND(p) \ diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 5b577c12eca..437250f557e 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -65,7 +65,7 @@ extern TidPath *create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals, Relids required_outer); extern AppendPath *create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths, List *partial_subpaths, - Relids required_outer, + List *pathkeys, Relids required_outer, int parallel_workers, bool parallel_aware, List *partitioned_rels, double rows); extern MergeAppendPath *create_merge_append_path(PlannerInfo *root, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 36d12bc3764..0e858097c87 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -194,6 +194,8 @@ extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths, extern Path *get_cheapest_parallel_safe_total_inner(List *paths); extern List *build_index_pathkeys(PlannerInfo *root, IndexOptInfo *index, ScanDirection scandir); +extern List *build_partition_pathkeys(PlannerInfo *root, RelOptInfo *partrel, + ScanDirection scandir, bool *partialkeys); extern List *build_expression_pathkey(PlannerInfo *root, Expr *expr, Relids nullable_relids, Oid opno, Relids rel, bool create_it); diff --git a/src/include/partitioning/partbounds.h b/src/include/partitioning/partbounds.h index 683e1574eae..c954965e4ef 100644 --- a/src/include/partitioning/partbounds.h +++ b/src/include/partitioning/partbounds.h @@ -88,6 +88,7 @@ extern bool partition_bounds_equal(int partnatts, int16 *parttyplen, PartitionBoundInfo b2); extern PartitionBoundInfo partition_bounds_copy(PartitionBoundInfo src, PartitionKey key); +extern bool partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts); extern void check_new_partition_bound(char *relname, Relation parent, PartitionBoundSpec *spec); extern void check_default_partition_contents(Relation parent, |
