summaryrefslogtreecommitdiff
path: root/src/backend/partitioning/partbounds.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/partitioning/partbounds.c')
-rw-r--r--src/backend/partitioning/partbounds.c627
1 files changed, 623 insertions, 4 deletions
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index c94f73aadc1..0b5e0dd89f5 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -36,6 +36,61 @@
#include "utils/ruleutils.h"
#include "utils/syscache.h"
+/*
+ * When qsort'ing partition bounds after reading from the catalog, each bound
+ * is represented with one of the following structs.
+ */
+
+/* One bound of a hash partition */
+typedef struct PartitionHashBound
+{
+ int modulus;
+ int remainder;
+ int index;
+} PartitionHashBound;
+
+/* One value coming from some (index'th) list partition */
+typedef struct PartitionListValue
+{
+ int index;
+ Datum value;
+} PartitionListValue;
+
+/* One bound of a range partition */
+typedef struct PartitionRangeBound
+{
+ int index;
+ Datum *datums; /* range bound datums */
+ PartitionRangeDatumKind *kind; /* the kind of each datum */
+ bool lower; /* this is the lower (vs upper) bound */
+} PartitionRangeBound;
+
+static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
+static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
+ void *arg);
+static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
+ void *arg);
+static PartitionBoundInfo create_hash_bounds(List *boundspecs,
+ PartitionKey key,
+ int **mapping);
+static PartitionBoundInfo create_list_bounds(List *boundspecs,
+ PartitionKey key,
+ int **mapping);
+static PartitionBoundInfo create_range_bounds(List *boundspecs,
+ PartitionKey key,
+ int **mapping);
+static PartitionRangeBound *make_one_partition_rbound(PartitionKey key, int index,
+ List *datums, bool lower);
+static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2,
+ int remainder2);
+static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
+ Oid *partcollation, Datum *datums1,
+ PartitionRangeDatumKind *kind1, bool lower1,
+ PartitionRangeBound *b2);
+static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
+ Oid *partcollation,
+ PartitionBoundInfo boundinfo,
+ PartitionRangeBound *probe, bool *is_equal);
static int get_partition_bound_num_indexes(PartitionBoundInfo b);
static Expr *make_partition_op_expr(PartitionKey key, int keynum,
uint16 strategy, Expr *arg1, Expr *arg2);
@@ -93,6 +148,521 @@ get_qual_from_partbound(Relation rel, Relation parent,
}
/*
+ * partition_bounds_create
+ * Build a PartitionBoundInfo struct from a list of PartitionBoundSpec
+ * nodes
+ *
+ * This function creates a PartitionBoundInfo and fills the values of its
+ * various members based on the input list. Importantly, 'datums' array will
+ * contain Datum representation of individual bounds (possibly after
+ * de-duplication as in case of range bounds), sorted in a canonical order
+ * defined by qsort_partition_* functions of respective partitioning methods.
+ * 'indexes' array will contain as many elements as there are bounds (specific
+ * exceptions to this rule are listed in the function body), which represent
+ * the 0-based canonical positions of partitions.
+ *
+ * Upon return from this function, *mapping is set to an array of
+ * list_length(boundspecs) elements, each of which maps the original index of
+ * a partition to its canonical index.
+ *
+ * Note: The objects returned by this function are wholly allocated in the
+ * current memory context.
+ */
+PartitionBoundInfo
+partition_bounds_create(List *boundspecs, PartitionKey key, int **mapping)
+{
+ int nparts = list_length(boundspecs);
+ int i;
+
+ Assert(nparts > 0);
+
+ /*
+ * For each partitioning method, we first convert the partition bounds
+ * from their parser node representation to the internal representation,
+ * along with any additional preprocessing (such as de-duplicating range
+ * bounds). Resulting bound datums are then added to the 'datums' array
+ * in PartitionBoundInfo. For each datum added, an integer indicating the
+ * canonical partition index is added to the 'indexes' array.
+ *
+ * For each bound, we remember its partition's position (0-based) in the
+ * original list to later map it to the canonical index.
+ */
+
+ /*
+ * Initialize mapping array with invalid values, this is filled within
+ * each sub-routine below depending on the bound type.
+ */
+ *mapping = (int *) palloc(sizeof(int) * nparts);
+ for (i = 0; i < nparts; i++)
+ (*mapping)[i] = -1;
+
+ switch (key->strategy)
+ {
+ case PARTITION_STRATEGY_HASH:
+ return create_hash_bounds(boundspecs, key, mapping);
+
+ case PARTITION_STRATEGY_LIST:
+ return create_list_bounds(boundspecs, key, mapping);
+
+ case PARTITION_STRATEGY_RANGE:
+ return create_range_bounds(boundspecs, key, mapping);
+
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ break;
+ }
+
+ Assert(false);
+ return NULL; /* keep compiler quiet */
+}
+
+/*
+ * create_hash_bounds
+ * Create a PartitionBoundInfo for a hash partitioned table
+ */
+static PartitionBoundInfo
+create_hash_bounds(List *boundspecs, PartitionKey key, int **mapping)
+{
+ PartitionBoundInfo boundinfo;
+ PartitionHashBound **hbounds = NULL;
+ ListCell *cell;
+ int i,
+ nparts = list_length(boundspecs);
+ int ndatums = 0;
+ int greatest_modulus;
+
+ boundinfo = (PartitionBoundInfoData *)
+ palloc0(sizeof(PartitionBoundInfoData));
+ boundinfo->strategy = key->strategy;
+ /* No special hash partitions. */
+ boundinfo->null_index = -1;
+ boundinfo->default_index = -1;
+
+ ndatums = nparts;
+ hbounds = (PartitionHashBound **)
+ palloc(nparts * sizeof(PartitionHashBound *));
+
+ /* Convert from node to the internal representation */
+ i = 0;
+ foreach(cell, boundspecs)
+ {
+ PartitionBoundSpec *spec = castNode(PartitionBoundSpec, lfirst(cell));
+
+ if (spec->strategy != PARTITION_STRATEGY_HASH)
+ elog(ERROR, "invalid strategy in partition bound spec");
+
+ hbounds[i] = (PartitionHashBound *) palloc(sizeof(PartitionHashBound));
+ hbounds[i]->modulus = spec->modulus;
+ hbounds[i]->remainder = spec->remainder;
+ hbounds[i]->index = i;
+ i++;
+ }
+
+ /* Sort all the bounds in ascending order */
+ qsort(hbounds, nparts, sizeof(PartitionHashBound *),
+ qsort_partition_hbound_cmp);
+
+ /* After sorting, moduli are now stored in ascending order. */
+ greatest_modulus = hbounds[ndatums - 1]->modulus;
+
+ boundinfo->ndatums = ndatums;
+ boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
+ boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int));
+ for (i = 0; i < greatest_modulus; i++)
+ boundinfo->indexes[i] = -1;
+
+ /*
+ * For hash partitioning, there are as many datums (modulus and remainder
+ * pairs) as there are partitions. Indexes are simply values ranging from
+ * 0 to (nparts - 1).
+ */
+ for (i = 0; i < nparts; i++)
+ {
+ int modulus = hbounds[i]->modulus;
+ int remainder = hbounds[i]->remainder;
+
+ boundinfo->datums[i] = (Datum *) palloc(2 * sizeof(Datum));
+ boundinfo->datums[i][0] = Int32GetDatum(modulus);
+ boundinfo->datums[i][1] = Int32GetDatum(remainder);
+
+ while (remainder < greatest_modulus)
+ {
+ /* overlap? */
+ Assert(boundinfo->indexes[remainder] == -1);
+ boundinfo->indexes[remainder] = i;
+ remainder += modulus;
+ }
+
+ (*mapping)[hbounds[i]->index] = i;
+ pfree(hbounds[i]);
+ }
+ pfree(hbounds);
+
+ return boundinfo;
+}
+
+/*
+ * create_list_bounds
+ * Create a PartitionBoundInfo for a list partitioned table
+ */
+static PartitionBoundInfo
+create_list_bounds(List *boundspecs, PartitionKey key, int **mapping)
+{
+ PartitionBoundInfo boundinfo;
+ PartitionListValue **all_values = NULL;
+ ListCell *cell;
+ int i = 0;
+ int ndatums = 0;
+ int next_index = 0;
+ int default_index = -1;
+ int null_index = -1;
+ List *non_null_values = NIL;
+
+ boundinfo = (PartitionBoundInfoData *)
+ palloc0(sizeof(PartitionBoundInfoData));
+ boundinfo->strategy = key->strategy;
+ /* Will be set correctly below. */
+ boundinfo->null_index = -1;
+ boundinfo->default_index = -1;
+
+ /* Create a unified list of non-null values across all partitions. */
+ foreach(cell, boundspecs)
+ {
+ PartitionBoundSpec *spec = castNode(PartitionBoundSpec, lfirst(cell));
+ ListCell *c;
+
+ if (spec->strategy != PARTITION_STRATEGY_LIST)
+ elog(ERROR, "invalid strategy in partition bound spec");
+
+ /*
+ * Note the index of the partition bound spec for the default
+ * partition. There's no datum to add to the list on non-null datums
+ * for this partition.
+ */
+ if (spec->is_default)
+ {
+ default_index = i;
+ i++;
+ continue;
+ }
+
+ foreach(c, spec->listdatums)
+ {
+ Const *val = castNode(Const, lfirst(c));
+ PartitionListValue *list_value = NULL;
+
+ if (!val->constisnull)
+ {
+ list_value = (PartitionListValue *)
+ palloc0(sizeof(PartitionListValue));
+ list_value->index = i;
+ list_value->value = val->constvalue;
+ }
+ else
+ {
+ /*
+ * Never put a null into the values array, flag instead for
+ * the code further down below where we construct the actual
+ * relcache struct.
+ */
+ if (null_index != -1)
+ elog(ERROR, "found null more than once");
+ null_index = i;
+ }
+
+ if (list_value)
+ non_null_values = lappend(non_null_values, list_value);
+ }
+
+ i++;
+ }
+
+ ndatums = list_length(non_null_values);
+
+ /*
+ * Collect all list values in one array. Alongside the value, we also save
+ * the index of partition the value comes from.
+ */
+ all_values = (PartitionListValue **)
+ palloc(ndatums * sizeof(PartitionListValue *));
+ i = 0;
+ foreach(cell, non_null_values)
+ {
+ PartitionListValue *src = lfirst(cell);
+
+ all_values[i] = (PartitionListValue *)
+ palloc(sizeof(PartitionListValue));
+ all_values[i]->value = src->value;
+ all_values[i]->index = src->index;
+ i++;
+ }
+
+ qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
+ qsort_partition_list_value_cmp, (void *) key);
+
+ boundinfo->ndatums = ndatums;
+ boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
+ boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
+
+ /*
+ * Copy values. Canonical indexes are values ranging from 0 to (nparts -
+ * 1) assigned to each partition such that all datums of a given partition
+ * receive the same value. The value for a given partition is the index of
+ * that partition's smallest datum in the all_values[] array.
+ */
+ for (i = 0; i < ndatums; i++)
+ {
+ int orig_index = all_values[i]->index;
+
+ boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
+ boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
+ key->parttypbyval[0],
+ key->parttyplen[0]);
+
+ /* If the old index has no mapping, assign one */
+ if ((*mapping)[orig_index] == -1)
+ (*mapping)[orig_index] = next_index++;
+
+ boundinfo->indexes[i] = (*mapping)[orig_index];
+ }
+
+ /*
+ * Set the canonical value for null_index, if any.
+ *
+ * It is possible that the null-accepting partition has not been assigned
+ * an index yet, which could happen if such partition accepts only null
+ * and hence not handled in the above loop which only looked at non-null
+ * values.
+ */
+ if (null_index != -1)
+ {
+ Assert(null_index >= 0);
+ if ((*mapping)[null_index] == -1)
+ (*mapping)[null_index] = next_index++;
+ boundinfo->null_index = (*mapping)[null_index];
+ }
+
+ /* Set the canonical value for default_index, if any. */
+ if (default_index != -1)
+ {
+ /*
+ * The default partition accepts any value not specified in the lists
+ * of other partitions, hence it should not get mapped index while
+ * assigning those for non-null datums.
+ */
+ Assert(default_index >= 0);
+ Assert((*mapping)[default_index] == -1);
+ (*mapping)[default_index] = next_index++;
+ boundinfo->default_index = (*mapping)[default_index];
+ }
+
+ /* All partition must now have been assigned canonical indexes. */
+ Assert(next_index == list_length(boundspecs));
+ return boundinfo;
+}
+
+/*
+ * create_range_bounds
+ * Create a PartitionBoundInfo for a range partitioned table
+ */
+static PartitionBoundInfo
+create_range_bounds(List *boundspecs, PartitionKey key, int **mapping)
+{
+ PartitionBoundInfo boundinfo;
+ PartitionRangeBound **rbounds = NULL;
+ PartitionRangeBound **all_bounds,
+ *prev;
+ ListCell *cell;
+ int i,
+ k,
+ nparts = list_length(boundspecs);
+ int ndatums = 0;
+ int default_index = -1;
+ int next_index = 0;
+
+ boundinfo = (PartitionBoundInfoData *)
+ palloc0(sizeof(PartitionBoundInfoData));
+ boundinfo->strategy = key->strategy;
+ /* There is no special null-accepting range partition. */
+ boundinfo->null_index = -1;
+ /* Will be set correctly below. */
+ boundinfo->default_index = -1;
+
+ all_bounds = (PartitionRangeBound **)
+ palloc0(2 * nparts * sizeof(PartitionRangeBound *));
+
+ /* Create a unified list of range bounds across all the partitions. */
+ i = ndatums = 0;
+ foreach(cell, boundspecs)
+ {
+ PartitionBoundSpec *spec = castNode(PartitionBoundSpec, lfirst(cell));
+ PartitionRangeBound *lower,
+ *upper;
+
+ if (spec->strategy != PARTITION_STRATEGY_RANGE)
+ elog(ERROR, "invalid strategy in partition bound spec");
+
+ /*
+ * Note the index of the partition bound spec for the default
+ * partition. There's no datum to add to the all_bounds array for
+ * this partition.
+ */
+ if (spec->is_default)
+ {
+ default_index = i++;
+ continue;
+ }
+
+ lower = make_one_partition_rbound(key, i, spec->lowerdatums, true);
+ upper = make_one_partition_rbound(key, i, spec->upperdatums, false);
+ all_bounds[ndatums++] = lower;
+ all_bounds[ndatums++] = upper;
+ i++;
+ }
+
+ Assert(ndatums == nparts * 2 ||
+ (default_index != -1 && ndatums == (nparts - 1) * 2));
+
+ /* Sort all the bounds in ascending order */
+ qsort_arg(all_bounds, ndatums,
+ sizeof(PartitionRangeBound *),
+ qsort_partition_rbound_cmp,
+ (void *) key);
+
+ /* Save distinct bounds from all_bounds into rbounds. */
+ rbounds = (PartitionRangeBound **)
+ palloc(ndatums * sizeof(PartitionRangeBound *));
+ k = 0;
+ prev = NULL;
+ for (i = 0; i < ndatums; i++)
+ {
+ PartitionRangeBound *cur = all_bounds[i];
+ bool is_distinct = false;
+ int j;
+
+ /* Is the current bound distinct from the previous one? */
+ for (j = 0; j < key->partnatts; j++)
+ {
+ Datum cmpval;
+
+ if (prev == NULL || cur->kind[j] != prev->kind[j])
+ {
+ is_distinct = true;
+ break;
+ }
+
+ /*
+ * If the bounds are both MINVALUE or MAXVALUE, stop now and treat
+ * them as equal, since any values after this point must be
+ * ignored.
+ */
+ if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
+ break;
+
+ cmpval = FunctionCall2Coll(&key->partsupfunc[j],
+ key->partcollation[j],
+ cur->datums[j],
+ prev->datums[j]);
+ if (DatumGetInt32(cmpval) != 0)
+ {
+ is_distinct = true;
+ break;
+ }
+ }
+
+ /*
+ * Only if the bound is distinct save it into a temporary array, i.e,
+ * rbounds which is later copied into boundinfo datums array.
+ */
+ if (is_distinct)
+ rbounds[k++] = all_bounds[i];
+
+ prev = cur;
+ }
+
+ /* Update ndatums to hold the count of distinct datums. */
+ ndatums = k;
+
+ /*
+ * Add datums to boundinfo. Canonical indexes are values ranging from 0
+ * to nparts - 1, assigned in that order to each partition's upper bound.
+ * For 'datums' elements that are lower bounds, there is -1 in the
+ * 'indexes' array to signify that no partition exists for the values less
+ * than such a bound and greater than or equal to the previous upper
+ * bound.
+ */
+ boundinfo->ndatums = ndatums;
+ boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
+ boundinfo->kind = (PartitionRangeDatumKind **)
+ palloc(ndatums *
+ sizeof(PartitionRangeDatumKind *));
+
+ /*
+ * For range partitioning, an additional value of -1 is stored as the last
+ * element.
+ */
+ boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int));
+
+ for (i = 0; i < ndatums; i++)
+ {
+ int j;
+
+ boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
+ sizeof(Datum));
+ boundinfo->kind[i] = (PartitionRangeDatumKind *)
+ palloc(key->partnatts *
+ sizeof(PartitionRangeDatumKind));
+ for (j = 0; j < key->partnatts; j++)
+ {
+ if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
+ boundinfo->datums[i][j] =
+ datumCopy(rbounds[i]->datums[j],
+ key->parttypbyval[j],
+ key->parttyplen[j]);
+ boundinfo->kind[i][j] = rbounds[i]->kind[j];
+ }
+
+ /*
+ * There is no mapping for invalid indexes.
+ *
+ * Any lower bounds in the rbounds array have invalid indexes
+ * assigned, because the values between the previous bound (if there
+ * is one) and this (lower) bound are not part of the range of any
+ * existing partition.
+ */
+ if (rbounds[i]->lower)
+ boundinfo->indexes[i] = -1;
+ else
+ {
+ int orig_index = rbounds[i]->index;
+
+ /* If the old index has no mapping, assign one */
+ if ((*mapping)[orig_index] == -1)
+ (*mapping)[orig_index] = next_index++;
+
+ boundinfo->indexes[i] = (*mapping)[orig_index];
+ }
+ }
+
+ /* Set the canonical value for default_index, if any. */
+ if (default_index != -1)
+ {
+ Assert(default_index >= 0 && (*mapping)[default_index] == -1);
+ (*mapping)[default_index] = next_index++;
+ boundinfo->default_index = (*mapping)[default_index];
+ }
+
+ /* The extra -1 element. */
+ Assert(i == ndatums);
+ boundinfo->indexes[i] = -1;
+
+ /* All partition must now have been assigned canonical indexes. */
+ Assert(next_index == nparts);
+ return boundinfo;
+}
+
+/*
* Are two partition bound collections logically equal?
*
* Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
@@ -763,7 +1333,7 @@ get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
* and a flag telling whether the bound is lower or not. Made into a function
* because there are multiple sites that want to use this facility.
*/
-PartitionRangeBound *
+static PartitionRangeBound *
make_one_partition_rbound(PartitionKey key, int index, List *datums, bool lower)
{
PartitionRangeBound *bound;
@@ -819,7 +1389,7 @@ make_one_partition_rbound(PartitionKey key, int index, List *datums, bool lower)
* structure, which only stores the upper bound of a common boundary between
* two contiguous partitions.
*/
-int32
+static int32
partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation,
Datum *datums1, PartitionRangeDatumKind *kind1,
@@ -914,7 +1484,7 @@ partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
*
* Compares modulus first, then remainder if modulus is equal.
*/
-int32
+static int32
partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2)
{
if (modulus1 < modulus2)
@@ -977,7 +1547,7 @@ partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
* *is_equal is set to true if the range bound at the returned index is equal
* to the input range bound
*/
-int
+static int
partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
@@ -1102,6 +1672,55 @@ partition_hash_bsearch(PartitionBoundInfo boundinfo,
}
/*
+ * qsort_partition_hbound_cmp
+ *
+ * Hash bounds are sorted by modulus, then by remainder.
+ */
+static int32
+qsort_partition_hbound_cmp(const void *a, const void *b)
+{
+ PartitionHashBound *h1 = (*(PartitionHashBound *const *) a);
+ PartitionHashBound *h2 = (*(PartitionHashBound *const *) b);
+
+ return partition_hbound_cmp(h1->modulus, h1->remainder,
+ h2->modulus, h2->remainder);
+}
+
+/*
+ * qsort_partition_list_value_cmp
+ *
+ * Compare two list partition bound datums.
+ */
+static int32
+qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
+{
+ Datum val1 = (*(const PartitionListValue **) a)->value,
+ val2 = (*(const PartitionListValue **) b)->value;
+ PartitionKey key = (PartitionKey) arg;
+
+ return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
+ key->partcollation[0],
+ val1, val2));
+}
+
+/*
+ * qsort_partition_rbound_cmp
+ *
+ * Used when sorting range bounds across all range partitions.
+ */
+static int32
+qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
+{
+ PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
+ PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
+ PartitionKey key = (PartitionKey) arg;
+
+ return partition_rbound_cmp(key->partnatts, key->partsupfunc,
+ key->partcollation, b1->datums, b1->kind,
+ b1->lower, b2);
+}
+
+/*
* get_partition_bound_num_indexes
*
* Returns the number of the entries in the partition bound indexes array.