summaryrefslogtreecommitdiff
path: root/src/include/nodes/relation.h
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2017-10-06 11:11:10 -0400
committerRobert Haas <rhaas@postgresql.org>2017-10-06 11:11:10 -0400
commitf49842d1ee31b976c681322f76025d7732e860f3 (patch)
treebc86f11819247980137e89404162ddc88200ac1c /src/include/nodes/relation.h
parentfe9ba28ee852bb968bc8948d172c6bc0c70c50df (diff)
Basic partition-wise join functionality.
Instead of joining two partitioned tables in their entirety we can, if it is an equi-join on the partition keys, join the matching partitions individually. This involves teaching the planner about "other join" rels, which are related to regular join rels in the same way that other member rels are related to baserels. This can use significantly more CPU time and memory than regular join planning, because there may now be a set of "other" rels not only for every base relation but also for every join relation. In most practical cases, this probably shouldn't be a problem, because (1) it's probably unusual to join many tables each with many partitions using the partition keys for all joins and (2) if you do that scenario then you probably have a big enough machine to handle the increased memory cost of planning and (3) the resulting plan is highly likely to be better, so what you spend in planning you'll make up on the execution side. All the same, for now, turn this feature off by default. Currently, we can only perform joins between two tables whose partitioning schemes are absolutely identical. It would be nice to cope with other scenarios, such as extra partitions on one side or the other with no match on the other side, but that will have to wait for a future patch. Ashutosh Bapat, reviewed and tested by Rajkumar Raghuwanshi, Amit Langote, Rafia Sabih, Thomas Munro, Dilip Kumar, Antonin Houska, Amit Khandekar, and by me. A few final adjustments by me. Discussion: http://postgr.es/m/CAFjFpRfQ8GrQvzp3jA2wnLqrHmaXna-urjm_UY9BqXj=EaDTSA@mail.gmail.com Discussion: http://postgr.es/m/CAFjFpRcitjfrULr5jfuKWRPsGUX0LQ0k8-yG0Qw2+1LBGNpMdw@mail.gmail.com
Diffstat (limited to 'src/include/nodes/relation.h')
-rw-r--r--src/include/nodes/relation.h50
1 files changed, 42 insertions, 8 deletions
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 48e6012f7fe..e085cefb7ba 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -391,6 +391,11 @@ typedef struct PartitionSchemeData *PartitionScheme;
* handling join alias Vars. Currently this is not needed because all join
* alias Vars are expanded to non-aliased form during preprocess_expression.
*
+ * We also have relations representing joins between child relations of
+ * different partitioned tables. These relations are not added to
+ * join_rel_level lists as they are not joined directly by the dynamic
+ * programming algorithm.
+ *
* There is also a RelOptKind for "upper" relations, which are RelOptInfos
* that describe post-scan/join processing steps, such as aggregation.
* Many of the fields in these RelOptInfos are meaningless, but their Path
@@ -525,14 +530,18 @@ typedef struct PartitionSchemeData *PartitionScheme;
* boundinfo - Partition bounds
* nparts - Number of partitions
* part_rels - RelOptInfos for each partition
- * partexprs - Partition key expressions
+ * partexprs, nullable_partexprs - Partition key expressions
*
* Note: A base relation always has only one set of partition keys, but a join
* relation may have as many sets of partition keys as the number of relations
- * being joined. partexprs is an array containing part_scheme->partnatts
- * elements, each of which is a list of partition key expressions. For a base
- * relation each list contains only one expression, but for a join relation
- * there can be one per baserel.
+ * being joined. partexprs and nullable_partexprs are arrays containing
+ * part_scheme->partnatts elements each. Each of these elements is a list of
+ * partition key expressions. For a base relation each list in partexprs
+ * contains only one expression and nullable_partexprs is not populated. For a
+ * join relation, partexprs and nullable_partexprs contain partition key
+ * expressions from non-nullable and nullable relations resp. Lists at any
+ * given position in those arrays together contain as many elements as the
+ * number of joining relations.
*----------
*/
typedef enum RelOptKind
@@ -540,6 +549,7 @@ typedef enum RelOptKind
RELOPT_BASEREL,
RELOPT_JOINREL,
RELOPT_OTHER_MEMBER_REL,
+ RELOPT_OTHER_JOINREL,
RELOPT_UPPER_REL,
RELOPT_DEADREL
} RelOptKind;
@@ -553,13 +563,17 @@ typedef enum RelOptKind
(rel)->reloptkind == RELOPT_OTHER_MEMBER_REL)
/* Is the given relation a join relation? */
-#define IS_JOIN_REL(rel) ((rel)->reloptkind == RELOPT_JOINREL)
+#define IS_JOIN_REL(rel) \
+ ((rel)->reloptkind == RELOPT_JOINREL || \
+ (rel)->reloptkind == RELOPT_OTHER_JOINREL)
/* Is the given relation an upper relation? */
#define IS_UPPER_REL(rel) ((rel)->reloptkind == RELOPT_UPPER_REL)
/* Is the given relation an "other" relation? */
-#define IS_OTHER_REL(rel) ((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL)
+#define IS_OTHER_REL(rel) \
+ ((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL || \
+ (rel)->reloptkind == RELOPT_OTHER_JOINREL)
typedef struct RelOptInfo
{
@@ -645,10 +659,30 @@ typedef struct RelOptInfo
struct PartitionBoundInfoData *boundinfo; /* Partition bounds */
struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions,
* stored in the same order of bounds */
- List **partexprs; /* Partition key expressions. */
+ List **partexprs; /* Non-nullable partition key expressions. */
+ List **nullable_partexprs; /* Nullable partition key expressions. */
} RelOptInfo;
/*
+ * Is given relation partitioned?
+ *
+ * A join between two partitioned relations with same partitioning scheme
+ * without any matching partitions will not have any partition in it but will
+ * have partition scheme set. So a relation is deemed to be partitioned if it
+ * has a partitioning scheme, bounds and positive number of partitions.
+ */
+#define IS_PARTITIONED_REL(rel) \
+ ((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0)
+
+/*
+ * Convenience macro to make sure that a partitioned relation has all the
+ * required members set.
+ */
+#define REL_HAS_ALL_PART_PROPS(rel) \
+ ((rel)->part_scheme && (rel)->boundinfo && (rel)->nparts > 0 && \
+ (rel)->part_rels && (rel)->partexprs && (rel)->nullable_partexprs)
+
+/*
* IndexOptInfo
* Per-index information for planning/optimization
*