summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2018-03-26 10:43:54 -0300
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2018-03-26 10:43:54 -0300
commit555ee77a9668e3f1b03307055b5027e13bf1a715 (patch)
treee024aef2f35c73208a7e5f11822d89d97548c6c1
parent1b89c2188bd38eac68251f16051859996128f2d0 (diff)
Handle INSERT .. ON CONFLICT with partitioned tables
Commit eb7ed3f30634 enabled unique constraints on partitioned tables, but one thing that was not working properly is INSERT/ON CONFLICT. This commit introduces a new node keeps state related to the ON CONFLICT clause per partition, and fills it when that partition is about to be used for tuple routing. Author: Amit Langote, Álvaro Herrera Reviewed-by: Etsuro Fujita, Pavan Deolasee Discussion: https://postgr.es/m/20180228004602.cwdyralmg5ejdqkq@alvherre.pgsql
-rw-r--r--doc/src/sgml/ddl.sgml15
-rw-r--r--doc/src/sgml/ref/insert.sgml8
-rw-r--r--src/backend/catalog/partition.c88
-rw-r--r--src/backend/executor/execMain.c4
-rw-r--r--src/backend/executor/execPartition.c233
-rw-r--r--src/backend/executor/nodeModifyTable.c74
-rw-r--r--src/backend/parser/analyze.c7
-rw-r--r--src/include/catalog/partition.h1
-rw-r--r--src/include/nodes/execnodes.h22
-rw-r--r--src/include/nodes/nodes.h1
-rw-r--r--src/test/regress/expected/insert_conflict.out108
-rw-r--r--src/test/regress/expected/triggers.out33
-rw-r--r--src/test/regress/sql/insert_conflict.sql95
-rw-r--r--src/test/regress/sql/triggers.sql33
14 files changed, 638 insertions, 84 deletions
diff --git a/doc/src/sgml/ddl.sgml b/doc/src/sgml/ddl.sgml
index 3a54ba9d5a7..8805b88d829 100644
--- a/doc/src/sgml/ddl.sgml
+++ b/doc/src/sgml/ddl.sgml
@@ -3324,21 +3324,6 @@ ALTER TABLE measurement ATTACH PARTITION measurement_y2008m02
<listitem>
<para>
- Using the <literal>ON CONFLICT</literal> clause with partitioned tables
- will cause an error if the conflict target is specified (see
- <xref linkend="sql-on-conflict" /> for more details on how the clause
- works). Therefore, it is not possible to specify
- <literal>DO UPDATE</literal> as the alternative action, because
- specifying the conflict target is mandatory in that case. On the other
- hand, specifying <literal>DO NOTHING</literal> as the alternative action
- works fine provided the conflict target is not specified. In that case,
- unique constraints (or exclusion constraints) of the individual leaf
- partitions are considered.
- </para>
- </listitem>
-
- <listitem>
- <para>
When an <command>UPDATE</command> causes a row to move from one
partition to another, there is a chance that another concurrent
<command>UPDATE</command> or <command>DELETE</command> misses this row.
diff --git a/doc/src/sgml/ref/insert.sgml b/doc/src/sgml/ref/insert.sgml
index 134092fa9c5..62e142fd8ef 100644
--- a/doc/src/sgml/ref/insert.sgml
+++ b/doc/src/sgml/ref/insert.sgml
@@ -518,6 +518,14 @@ INSERT INTO <replaceable class="parameter">table_name</replaceable> [ AS <replac
not duplicate each other in terms of attributes constrained by an
arbiter index or constraint.
</para>
+
+ <para>
+ Note that it is currently not supported for the
+ <literal>ON CONFLICT DO UPDATE</literal> clause of an
+ <command>INSERT</command> applied to a partitioned table to update the
+ partition key of a conflicting row such that it requires the row be moved
+ to a new partition.
+ </para>
<tip>
<para>
It is often preferable to use unique index inference rather than
diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c
index 53855f5088b..b00a986432e 100644
--- a/src/backend/catalog/partition.c
+++ b/src/backend/catalog/partition.c
@@ -138,6 +138,10 @@ typedef struct PartitionRangeBound
bool lower; /* this is the lower (vs upper) bound */
} PartitionRangeBound;
+
+static Oid get_partition_parent_worker(Relation inhRel, Oid relid);
+static void get_partition_ancestors_worker(Relation inhRel, Oid relid,
+ List **ancestors);
static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
void *arg);
@@ -1377,6 +1381,7 @@ check_default_allows_bound(Relation parent, Relation default_rel,
/*
* get_partition_parent
+ * Obtain direct parent of given relation
*
* Returns inheritance parent of a partition by scanning pg_inherits
*
@@ -1387,15 +1392,34 @@ check_default_allows_bound(Relation parent, Relation default_rel,
Oid
get_partition_parent(Oid relid)
{
- Form_pg_inherits form;
Relation catalogRelation;
- SysScanDesc scan;
- ScanKeyData key[2];
- HeapTuple tuple;
Oid result;
catalogRelation = heap_open(InheritsRelationId, AccessShareLock);
+ result = get_partition_parent_worker(catalogRelation, relid);
+
+ if (!OidIsValid(result))
+ elog(ERROR, "could not find tuple for parent of relation %u", relid);
+
+ heap_close(catalogRelation, AccessShareLock);
+
+ return result;
+}
+
+/*
+ * get_partition_parent_worker
+ * Scan the pg_inherits relation to return the OID of the parent of the
+ * given relation
+ */
+static Oid
+get_partition_parent_worker(Relation inhRel, Oid relid)
+{
+ SysScanDesc scan;
+ ScanKeyData key[2];
+ Oid result = InvalidOid;
+ HeapTuple tuple;
+
ScanKeyInit(&key[0],
Anum_pg_inherits_inhrelid,
BTEqualStrategyNumber, F_OIDEQ,
@@ -1405,23 +1429,65 @@ get_partition_parent(Oid relid)
BTEqualStrategyNumber, F_INT4EQ,
Int32GetDatum(1));
- scan = systable_beginscan(catalogRelation, InheritsRelidSeqnoIndexId, true,
+ scan = systable_beginscan(inhRel, InheritsRelidSeqnoIndexId, true,
NULL, 2, key);
-
tuple = systable_getnext(scan);
- if (!HeapTupleIsValid(tuple))
- elog(ERROR, "could not find tuple for parent of relation %u", relid);
+ if (HeapTupleIsValid(tuple))
+ {
+ Form_pg_inherits form = (Form_pg_inherits) GETSTRUCT(tuple);
- form = (Form_pg_inherits) GETSTRUCT(tuple);
- result = form->inhparent;
+ result = form->inhparent;
+ }
systable_endscan(scan);
- heap_close(catalogRelation, AccessShareLock);
return result;
}
/*
+ * get_partition_ancestors
+ * Obtain ancestors of given relation
+ *
+ * Returns a list of ancestors of the given relation.
+ *
+ * Note: Because this function assumes that the relation whose OID is passed
+ * as an argument and each ancestor will have precisely one parent, it should
+ * only be called when it is known that the relation is a partition.
+ */
+List *
+get_partition_ancestors(Oid relid)
+{
+ List *result = NIL;
+ Relation inhRel;
+
+ inhRel = heap_open(InheritsRelationId, AccessShareLock);
+
+ get_partition_ancestors_worker(inhRel, relid, &result);
+
+ heap_close(inhRel, AccessShareLock);
+
+ return result;
+}
+
+/*
+ * get_partition_ancestors_worker
+ * recursive worker for get_partition_ancestors
+ */
+static void
+get_partition_ancestors_worker(Relation inhRel, Oid relid, List **ancestors)
+{
+ Oid parentOid;
+
+ /* Recursion ends at the topmost level, ie., when there's no parent */
+ parentOid = get_partition_parent_worker(inhRel, relid);
+ if (parentOid == InvalidOid)
+ return;
+
+ *ancestors = lappend_oid(*ancestors, parentOid);
+ get_partition_ancestors_worker(inhRel, parentOid, ancestors);
+}
+
+/*
* get_qual_from_partbound
* Given a parser node for partition bound, return the list of executable
* expressions as partition constraint
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index f47c691d12b..68f6450ee64 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -1347,11 +1347,15 @@ InitResultRelInfo(ResultRelInfo *resultRelInfo,
resultRelInfo->ri_FdwRoutine = GetFdwRoutineForRelation(resultRelationDesc, true);
else
resultRelInfo->ri_FdwRoutine = NULL;
+
+ /* The following fields are set later if needed */
resultRelInfo->ri_FdwState = NULL;
resultRelInfo->ri_usesFdwDirectModify = false;
resultRelInfo->ri_ConstraintExprs = NULL;
resultRelInfo->ri_junkFilter = NULL;
resultRelInfo->ri_projectReturning = NULL;
+ resultRelInfo->ri_onConflictArbiterIndexes = NIL;
+ resultRelInfo->ri_onConflict = NULL;
/*
* Partition constraint, which also includes the partition constraint of
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index ce9a4e16cfb..9a131886491 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -15,10 +15,12 @@
#include "postgres.h"
#include "catalog/pg_inherits_fn.h"
+#include "catalog/pg_type.h"
#include "executor/execPartition.h"
#include "executor/executor.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
+#include "nodes/makefuncs.h"
#include "utils/lsyscache.h"
#include "utils/rls.h"
#include "utils/ruleutils.h"
@@ -36,6 +38,8 @@ static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
Datum *values,
bool *isnull,
int maxfieldlen);
+static List *adjust_partition_tlist(List *tlist, TupleConversionMap *map);
+
/*
* ExecSetupPartitionTupleRouting - sets up information needed during
@@ -64,6 +68,8 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel)
int num_update_rri = 0,
update_rri_index = 0;
PartitionTupleRouting *proute;
+ int nparts;
+ ModifyTable *node = mtstate ? (ModifyTable *) mtstate->ps.plan : NULL;
/*
* Get the information about the partition tree after locking all the
@@ -74,20 +80,16 @@ ExecSetupPartitionTupleRouting(ModifyTableState *mtstate, Relation rel)
proute->partition_dispatch_info =
RelationGetPartitionDispatchInfo(rel, &proute->num_dispatch,
&leaf_parts);
- proute->num_partitions = list_length(leaf_parts);
- proute->partitions = (ResultRelInfo **) palloc(proute->num_partitions *
- sizeof(ResultRelInfo *));
+ proute->num_partitions = nparts = list_length(leaf_parts);
+ proute->partitions =
+ (ResultRelInfo **) palloc(nparts * sizeof(ResultRelInfo *));
proute->parent_child_tupconv_maps =
- (TupleConversionMap **) palloc0(proute->num_partitions *
- sizeof(TupleConversionMap *));
- proute->partition_oids = (Oid *) palloc(proute->num_partitions *
- sizeof(Oid));
+ (TupleConversionMap **) palloc0(nparts * sizeof(TupleConversionMap *));
+ proute->partition_oids = (Oid *) palloc(nparts * sizeof(Oid));
/* Set up details specific to the type of tuple routing we are doing. */
- if (mtstate && mtstate->operation == CMD_UPDATE)
+ if (node && node->operation == CMD_UPDATE)
{
- ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
-
update_rri = mtstate->resultRelInfo;
num_update_rri = list_length(node->plans);
proute->subplan_partition_offsets =
@@ -328,7 +330,7 @@ ExecInitPartitionInfo(ModifyTableState *mtstate,
*/
oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
- leaf_part_rri = (ResultRelInfo *) palloc0(sizeof(ResultRelInfo));
+ leaf_part_rri = makeNode(ResultRelInfo);
InitResultRelInfo(leaf_part_rri,
partrel,
node ? node->nominalRelation : 1,
@@ -475,9 +477,6 @@ ExecInitPartitionInfo(ModifyTableState *mtstate,
&mtstate->ps, RelationGetDescr(partrel));
}
- Assert(proute->partitions[partidx] == NULL);
- proute->partitions[partidx] = leaf_part_rri;
-
/*
* Save a tuple conversion map to convert a tuple routed to this partition
* from the parent's type to the partition's.
@@ -487,6 +486,145 @@ ExecInitPartitionInfo(ModifyTableState *mtstate,
RelationGetDescr(partrel),
gettext_noop("could not convert row type"));
+ /*
+ * If there is an ON CONFLICT clause, initialize state for it.
+ */
+ if (node && node->onConflictAction != ONCONFLICT_NONE)
+ {
+ TupleConversionMap *map = proute->parent_child_tupconv_maps[partidx];
+ int firstVarno = mtstate->resultRelInfo[0].ri_RangeTableIndex;
+ Relation firstResultRel = mtstate->resultRelInfo[0].ri_RelationDesc;
+ TupleDesc partrelDesc = RelationGetDescr(partrel);
+ ExprContext *econtext = mtstate->ps.ps_ExprContext;
+ ListCell *lc;
+ List *arbiterIndexes = NIL;
+
+ /*
+ * If there is a list of arbiter indexes, map it to a list of indexes
+ * in the partition. We do that by scanning the partition's index
+ * list and searching for ancestry relationships to each index in the
+ * ancestor table.
+ */
+ if (list_length(resultRelInfo->ri_onConflictArbiterIndexes) > 0)
+ {
+ List *childIdxs;
+
+ childIdxs = RelationGetIndexList(leaf_part_rri->ri_RelationDesc);
+
+ foreach(lc, childIdxs)
+ {
+ Oid childIdx = lfirst_oid(lc);
+ List *ancestors;
+ ListCell *lc2;
+
+ ancestors = get_partition_ancestors(childIdx);
+ foreach(lc2, resultRelInfo->ri_onConflictArbiterIndexes)
+ {
+ if (list_member_oid(ancestors, lfirst_oid(lc2)))
+ arbiterIndexes = lappend_oid(arbiterIndexes, childIdx);
+ }
+ list_free(ancestors);
+ }
+ }
+
+ /*
+ * If the resulting lists are of inequal length, something is wrong.
+ * (This shouldn't happen, since arbiter index selection should not
+ * pick up an invalid index.)
+ */
+ if (list_length(resultRelInfo->ri_onConflictArbiterIndexes) !=
+ list_length(arbiterIndexes))
+ elog(ERROR, "invalid arbiter index list");
+ leaf_part_rri->ri_onConflictArbiterIndexes = arbiterIndexes;
+
+ /*
+ * In the DO UPDATE case, we have some more state to initialize.
+ */
+ if (node->onConflictAction == ONCONFLICT_UPDATE)
+ {
+ Assert(node->onConflictSet != NIL);
+ Assert(resultRelInfo->ri_onConflict != NULL);
+
+ /*
+ * If the partition's tuple descriptor matches exactly the root
+ * parent (the common case), we can simply re-use the parent's ON
+ * CONFLICT SET state, skipping a bunch of work. Otherwise, we
+ * need to create state specific to this partition.
+ */
+ if (map == NULL)
+ leaf_part_rri->ri_onConflict = resultRelInfo->ri_onConflict;
+ else
+ {
+ List *onconflset;
+ TupleDesc tupDesc;
+ bool found_whole_row;
+
+ leaf_part_rri->ri_onConflict = makeNode(OnConflictSetState);
+
+ /*
+ * Translate expressions in onConflictSet to account for
+ * different attribute numbers. For that, map partition
+ * varattnos twice: first to catch the EXCLUDED
+ * pseudo-relation (INNER_VAR), and second to handle the main
+ * target relation (firstVarno).
+ */
+ onconflset = (List *) copyObject((Node *) node->onConflictSet);
+ onconflset =
+ map_partition_varattnos(onconflset, INNER_VAR, partrel,
+ firstResultRel, &found_whole_row);
+ Assert(!found_whole_row);
+ onconflset =
+ map_partition_varattnos(onconflset, firstVarno, partrel,
+ firstResultRel, &found_whole_row);
+ Assert(!found_whole_row);
+
+ /* Finally, adjust this tlist to match the partition. */
+ onconflset = adjust_partition_tlist(onconflset, map);
+
+ /*
+ * Build UPDATE SET's projection info. The user of this
+ * projection is responsible for setting the slot's tupdesc!
+ * We set aside a tupdesc that's good for the common case of a
+ * partition that's tupdesc-equal to the partitioned table;
+ * partitions of different tupdescs must generate their own.
+ */
+ tupDesc = ExecTypeFromTL(onconflset, partrelDesc->tdhasoid);
+ ExecSetSlotDescriptor(mtstate->mt_conflproj, tupDesc);
+ leaf_part_rri->ri_onConflict->oc_ProjInfo =
+ ExecBuildProjectionInfo(onconflset, econtext,
+ mtstate->mt_conflproj,
+ &mtstate->ps, partrelDesc);
+ leaf_part_rri->ri_onConflict->oc_ProjTupdesc = tupDesc;
+
+ /*
+ * If there is a WHERE clause, initialize state where it will
+ * be evaluated, mapping the attribute numbers appropriately.
+ * As with onConflictSet, we need to map partition varattnos
+ * to the partition's tupdesc.
+ */
+ if (node->onConflictWhere)
+ {
+ List *clause;
+
+ clause = copyObject((List *) node->onConflictWhere);
+ clause = map_partition_varattnos(clause, INNER_VAR,
+ partrel, firstResultRel,
+ &found_whole_row);
+ Assert(!found_whole_row);
+ clause = map_partition_varattnos(clause, firstVarno,
+ partrel, firstResultRel,
+ &found_whole_row);
+ Assert(!found_whole_row);
+ leaf_part_rri->ri_onConflict->oc_WhereClause =
+ ExecInitQual((List *) clause, &mtstate->ps);
+ }
+ }
+ }
+ }
+
+ Assert(proute->partitions[partidx] == NULL);
+ proute->partitions[partidx] = leaf_part_rri;
+
MemoryContextSwitchTo(oldContext);
return leaf_part_rri;
@@ -946,3 +1084,70 @@ ExecBuildSlotPartitionKeyDescription(Relation rel,
return buf.data;
}
+
+/*
+ * adjust_partition_tlist
+ * Adjust the targetlist entries for a given partition to account for
+ * attribute differences between parent and the partition
+ *
+ * The expressions have already been fixed, but here we fix the list to make
+ * target resnos match the partition's attribute numbers. This results in a
+ * copy of the original target list in which the entries appear in resno
+ * order, including both the existing entries (that may have their resno
+ * changed in-place) and the newly added entries for columns that don't exist
+ * in the parent.
+ *
+ * Scribbles on the input tlist, so callers must make sure to make a copy
+ * before passing it to us.
+ */
+static List *
+adjust_partition_tlist(List *tlist, TupleConversionMap *map)
+{
+ List *new_tlist = NIL;
+ TupleDesc tupdesc = map->outdesc;
+ AttrNumber *attrMap = map->attrMap;
+ AttrNumber attrno;
+
+ for (attrno = 1; attrno <= tupdesc->natts; attrno++)
+ {
+ Form_pg_attribute att_tup = TupleDescAttr(tupdesc, attrno - 1);
+ TargetEntry *tle;
+
+ if (attrMap[attrno - 1] != InvalidAttrNumber)
+ {
+ Assert(!att_tup->attisdropped);
+
+ /*
+ * Use the corresponding entry from the parent's tlist, adjusting
+ * the resno the match the partition's attno.
+ */
+ tle = (TargetEntry *) list_nth(tlist, attrMap[attrno - 1] - 1);
+ tle->resno = attrno;
+ }
+ else
+ {
+ Const *expr;
+
+ /*
+ * For a dropped attribute in the partition, generate a dummy
+ * entry with resno matching the partition's attno.
+ */
+ Assert(att_tup->attisdropped);
+ expr = makeConst(INT4OID,
+ -1,
+ InvalidOid,
+ sizeof(int32),
+ (Datum) 0,
+ true, /* isnull */
+ true /* byval */ );
+ tle = makeTargetEntry((Expr *) expr,
+ attrno,
+ pstrdup(NameStr(att_tup->attname)),
+ false);
+ }
+
+ new_tlist = lappend(new_tlist, tle);
+ }
+
+ return new_tlist;
+}
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 4fa2d7265f0..1b09868ff8e 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -422,7 +422,7 @@ ExecInsert(ModifyTableState *mtstate,
bool specConflict;
List *arbiterIndexes;
- arbiterIndexes = node->arbiterIndexes;
+ arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
/*
* Do a non-conclusive check for conflicts first.
@@ -1056,6 +1056,18 @@ lreplace:;
TupleConversionMap *tupconv_map;
/*
+ * Disallow an INSERT ON CONFLICT DO UPDATE that causes the
+ * original row to migrate to a different partition. Maybe this
+ * can be implemented some day, but it seems a fringe feature with
+ * little redeeming value.
+ */
+ if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("invalid ON UPDATE specification"),
+ errdetail("The result tuple would appear in a different partition than the original tuple.")));
+
+ /*
* When an UPDATE is run on a leaf partition, we will not have
* partition tuple routing set up. In that case, fail with
* partition constraint violation error.
@@ -1313,7 +1325,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
{
ExprContext *econtext = mtstate->ps.ps_ExprContext;
Relation relation = resultRelInfo->ri_RelationDesc;
- ExprState *onConflictSetWhere = resultRelInfo->ri_onConflictSetWhere;
+ ExprState *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
HeapTupleData tuple;
HeapUpdateFailureData hufd;
LockTupleMode lockmode;
@@ -1462,7 +1474,7 @@ ExecOnConflictUpdate(ModifyTableState *mtstate,
}
/* Project the new tuple version */
- ExecProject(resultRelInfo->ri_onConflictSetProj);
+ ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo);
/*
* Note that it is possible that the target tuple has been modified in
@@ -1639,6 +1651,7 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
ResultRelInfo *targetRelInfo,
TupleTableSlot *slot)
{
+ ModifyTable *node;
int partidx;
ResultRelInfo *partrel;
HeapTuple tuple;
@@ -1720,6 +1733,19 @@ ExecPrepareTupleRouting(ModifyTableState *mtstate,
proute->partition_tuple_slot,
&slot);
+ /* Initialize information needed to handle ON CONFLICT DO UPDATE. */
+ Assert(mtstate != NULL);
+ node = (ModifyTable *) mtstate->ps.plan;
+ if (node->onConflictAction == ONCONFLICT_UPDATE)
+ {
+ Assert(mtstate->mt_existing != NULL);
+ ExecSetSlotDescriptor(mtstate->mt_existing,
+ RelationGetDescr(partrel->ri_RelationDesc));
+ Assert(mtstate->mt_conflproj != NULL);
+ ExecSetSlotDescriptor(mtstate->mt_conflproj,
+ partrel->ri_onConflict->oc_ProjTupdesc);
+ }
+
return slot;
}
@@ -2347,11 +2373,15 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
mtstate->ps.ps_ExprContext = NULL;
}
+ /* Set the list of arbiter indexes if needed for ON CONFLICT */
+ resultRelInfo = mtstate->resultRelInfo;
+ if (node->onConflictAction != ONCONFLICT_NONE)
+ resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes;
+
/*
* If needed, Initialize target list, projection and qual for ON CONFLICT
* DO UPDATE.
*/
- resultRelInfo = mtstate->resultRelInfo;
if (node->onConflictAction == ONCONFLICT_UPDATE)
{
ExprContext *econtext;
@@ -2368,34 +2398,54 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
econtext = mtstate->ps.ps_ExprContext;
relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
- /* initialize slot for the existing tuple */
+ /*
+ * Initialize slot for the existing tuple. If we'll be performing
+ * tuple routing, the tuple descriptor to use for this will be
+ * determined based on which relation the update is actually applied
+ * to, so we don't set its tuple descriptor here.
+ */
mtstate->mt_existing =
- ExecInitExtraTupleSlot(mtstate->ps.state, relationDesc);
+ ExecInitExtraTupleSlot(mtstate->ps.state,
+ mtstate->mt_partition_tuple_routing ?
+ NULL : relationDesc);
/* carried forward solely for the benefit of explain */
mtstate->mt_excludedtlist = node->exclRelTlist;
- /* create target slot for UPDATE SET projection */
+ /* create state for DO UPDATE SET operation */
+ resultRelInfo->ri_onConflict = makeNode(OnConflictSetState);
+
+ /*
+ * Create the tuple slot for the UPDATE SET projection.
+ *
+ * Just like mt_existing above, we leave it without a tuple descriptor
+ * in the case of partitioning tuple routing, so that it can be
+ * changed by ExecPrepareTupleRouting. In that case, we still save
+ * the tupdesc in the parent's state: it can be reused by partitions
+ * with an identical descriptor to the parent.
+ */
tupDesc = ExecTypeFromTL((List *) node->onConflictSet,
relationDesc->tdhasoid);
mtstate->mt_conflproj =
- ExecInitExtraTupleSlot(mtstate->ps.state, tupDesc);
+ ExecInitExtraTupleSlot(mtstate->ps.state,
+ mtstate->mt_partition_tuple_routing ?
+ NULL : tupDesc);
+ resultRelInfo->ri_onConflict->oc_ProjTupdesc = tupDesc;
/* build UPDATE SET projection state */
- resultRelInfo->ri_onConflictSetProj =
+ resultRelInfo->ri_onConflict->oc_ProjInfo =
ExecBuildProjectionInfo(node->onConflictSet, econtext,
mtstate->mt_conflproj, &mtstate->ps,
relationDesc);
- /* build DO UPDATE WHERE clause expression */
+ /* initialize state to evaluate the WHERE clause, if any */
if (node->onConflictWhere)
{
ExprState *qualexpr;
qualexpr = ExecInitQual((List *) node->onConflictWhere,
&mtstate->ps);
-
- resultRelInfo->ri_onConflictSetWhere = qualexpr;
+ resultRelInfo->ri_onConflict->oc_WhereClause = qualexpr;
}
}
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index cf1a34e41ae..a4b5aaef44f 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -1026,13 +1026,6 @@ transformOnConflictClause(ParseState *pstate,
TargetEntry *te;
int attno;
- if (targetrel->rd_partdesc)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("%s cannot be applied to partitioned table \"%s\"",
- "ON CONFLICT DO UPDATE",
- RelationGetRelationName(targetrel))));
-
/*
* All INSERT expressions have been parsed, get ready for potentially
* existing SET statements that need to be processed like an UPDATE.
diff --git a/src/include/catalog/partition.h b/src/include/catalog/partition.h
index 2faf0ca26e9..cd15faa7a14 100644
--- a/src/include/catalog/partition.h
+++ b/src/include/catalog/partition.h
@@ -52,6 +52,7 @@ extern PartitionBoundInfo partition_bounds_copy(PartitionBoundInfo src,
extern void check_new_partition_bound(char *relname, Relation parent,
PartitionBoundSpec *spec);
extern Oid get_partition_parent(Oid relid);
+extern List *get_partition_ancestors(Oid relid);
extern List *get_qual_from_partbound(Relation rel, Relation parent,
PartitionBoundSpec *spec);
extern List *map_partition_varattnos(List *expr, int fromrel_varno,
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h
index bf2616a95ed..2c2d2823c05 100644
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -363,6 +363,20 @@ typedef struct JunkFilter
} JunkFilter;
/*
+ * OnConflictSetState
+ *
+ * Executor state of an ON CONFLICT DO UPDATE operation.
+ */
+typedef struct OnConflictSetState
+{
+ NodeTag type;
+
+ ProjectionInfo *oc_ProjInfo; /* for ON CONFLICT DO UPDATE SET */
+ TupleDesc oc_ProjTupdesc; /* TupleDesc for the above projection */
+ ExprState *oc_WhereClause; /* state for the WHERE clause */
+} OnConflictSetState;
+
+/*
* ResultRelInfo
*
* Whenever we update an existing relation, we have to update indexes on the
@@ -424,11 +438,11 @@ typedef struct ResultRelInfo
/* for computing a RETURNING list */
ProjectionInfo *ri_projectReturning;
- /* for computing ON CONFLICT DO UPDATE SET */
- ProjectionInfo *ri_onConflictSetProj;
+ /* list of arbiter indexes to use to check conflicts */
+ List *ri_onConflictArbiterIndexes;
- /* list of ON CONFLICT DO UPDATE exprs (qual) */
- ExprState *ri_onConflictSetWhere;
+ /* ON CONFLICT evaluation state */
+ OnConflictSetState *ri_onConflict;
/* partition check expression */
List *ri_PartitionCheck;
diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h
index 74b094a9c39..443de227041 100644
--- a/src/include/nodes/nodes.h
+++ b/src/include/nodes/nodes.h
@@ -34,6 +34,7 @@ typedef enum NodeTag
T_ExprContext,
T_ProjectionInfo,
T_JunkFilter,
+ T_OnConflictSetState,
T_ResultRelInfo,
T_EState,
T_TupleTableSlot,
diff --git a/src/test/regress/expected/insert_conflict.out b/src/test/regress/expected/insert_conflict.out
index 2650faedeec..2d7061fa1b0 100644
--- a/src/test/regress/expected/insert_conflict.out
+++ b/src/test/regress/expected/insert_conflict.out
@@ -786,16 +786,102 @@ select * from selfconflict;
(3 rows)
drop table selfconflict;
--- check that the following works:
--- insert into partitioned_table on conflict do nothing
-create table parted_conflict_test (a int, b char) partition by list (a);
-create table parted_conflict_test_1 partition of parted_conflict_test (b unique) for values in (1);
+-- check ON CONFLICT handling with partitioned tables
+create table parted_conflict_test (a int unique, b char) partition by list (a);
+create table parted_conflict_test_1 partition of parted_conflict_test (b unique) for values in (1, 2);
+-- no indexes required here
insert into parted_conflict_test values (1, 'a') on conflict do nothing;
-insert into parted_conflict_test values (1, 'a') on conflict do nothing;
--- however, on conflict do update is not supported yet
-insert into parted_conflict_test values (1) on conflict (b) do update set a = excluded.a;
-ERROR: ON CONFLICT DO UPDATE cannot be applied to partitioned table "parted_conflict_test"
--- but it works OK if we target the partition directly
-insert into parted_conflict_test_1 values (1) on conflict (b) do
-update set a = excluded.a;
+-- index on a required, which does exist in parent
+insert into parted_conflict_test values (1, 'a') on conflict (a) do nothing;
+insert into parted_conflict_test values (1, 'a') on conflict (a) do update set b = excluded.b;
+-- targeting partition directly will work
+insert into parted_conflict_test_1 values (1, 'a') on conflict (a) do nothing;
+insert into parted_conflict_test_1 values (1, 'b') on conflict (a) do update set b = excluded.b;
+-- index on b required, which doesn't exist in parent
+insert into parted_conflict_test values (2, 'b') on conflict (b) do update set a = excluded.a;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+-- targeting partition directly will work
+insert into parted_conflict_test_1 values (2, 'b') on conflict (b) do update set a = excluded.a;
+-- should see (2, 'b')
+select * from parted_conflict_test order by a;
+ a | b
+---+---
+ 2 | b
+(1 row)
+
+-- now check that DO UPDATE works correctly for target partition with
+-- different attribute numbers
+create table parted_conflict_test_2 (b char, a int unique);
+alter table parted_conflict_test attach partition parted_conflict_test_2 for values in (3);
+truncate parted_conflict_test;
+insert into parted_conflict_test values (3, 'a') on conflict (a) do update set b = excluded.b;
+insert into parted_conflict_test values (3, 'b') on conflict (a) do update set b = excluded.b;
+-- should see (3, 'b')
+select * from parted_conflict_test order by a;
+ a | b
+---+---
+ 3 | b
+(1 row)
+
+-- case where parent will have a dropped column, but the partition won't
+alter table parted_conflict_test drop b, add b char;
+create table parted_conflict_test_3 partition of parted_conflict_test for values in (4);
+truncate parted_conflict_test;
+insert into parted_conflict_test (a, b) values (4, 'a') on conflict (a) do update set b = excluded.b;
+insert into parted_conflict_test (a, b) values (4, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a';
+-- should see (4, 'b')
+select * from parted_conflict_test order by a;
+ a | b
+---+---
+ 4 | b
+(1 row)
+
+-- case with multi-level partitioning
+create table parted_conflict_test_4 partition of parted_conflict_test for values in (5) partition by list (a);
+create table parted_conflict_test_4_1 partition of parted_conflict_test_4 for values in (5);
+truncate parted_conflict_test;
+insert into parted_conflict_test (a, b) values (5, 'a') on conflict (a) do update set b = excluded.b;
+insert into parted_conflict_test (a, b) values (5, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a';
+-- should see (5, 'b')
+select * from parted_conflict_test order by a;
+ a | b
+---+---
+ 5 | b
+(1 row)
+
+-- test with multiple rows
+truncate parted_conflict_test;
+insert into parted_conflict_test (a, b) values (1, 'a'), (2, 'a'), (4, 'a') on conflict (a) do update set b = excluded.b where excluded.b = 'b';
+insert into parted_conflict_test (a, b) values (1, 'b'), (2, 'c'), (4, 'b') on conflict (a) do update set b = excluded.b where excluded.b = 'b';
+-- should see (1, 'b'), (2, 'a'), (4, 'b')
+select * from parted_conflict_test order by a;
+ a | b
+---+---
+ 1 | b
+ 2 | a
+ 4 | b
+(3 rows)
+
drop table parted_conflict_test;
+-- test behavior of inserting a conflicting tuple into an intermediate
+-- partitioning level
+create table parted_conflict (a int primary key, b text) partition by range (a);
+create table parted_conflict_1 partition of parted_conflict for values from (0) to (1000) partition by range (a);
+create table parted_conflict_1_1 partition of parted_conflict_1 for values from (0) to (500);
+insert into parted_conflict values (40, 'forty');
+insert into parted_conflict_1 values (40, 'cuarenta')
+ on conflict (a) do update set b = excluded.b;
+drop table parted_conflict;
+-- same thing, but this time try to use an index that's created not in the
+-- partition
+create table parted_conflict (a int, b text) partition by range (a);
+create table parted_conflict_1 partition of parted_conflict for values from (0) to (1000) partition by range (a);
+create unique index on only parted_conflict_1 (a);
+create unique index on only parted_conflict (a);
+alter index parted_conflict_a_idx attach partition parted_conflict_1_a_idx;
+create table parted_conflict_1_1 partition of parted_conflict_1 for values from (0) to (500);
+insert into parted_conflict values (40, 'forty');
+insert into parted_conflict_1 values (40, 'cuarenta')
+ on conflict (a) do update set b = excluded.b;
+ERROR: there is no unique or exclusion constraint matching the ON CONFLICT specification
+drop table parted_conflict;
diff --git a/src/test/regress/expected/triggers.out b/src/test/regress/expected/triggers.out
index 53e7ae41ba7..f534d0db18b 100644
--- a/src/test/regress/expected/triggers.out
+++ b/src/test/regress/expected/triggers.out
@@ -2624,6 +2624,39 @@ insert into my_table values (3, 'CCC'), (4, 'DDD')
NOTICE: trigger = my_table_update_trig, old table = (3,CCC), (4,DDD), new table = (3,CCC:CCC), (4,DDD:DDD)
NOTICE: trigger = my_table_insert_trig, new table = <NULL>
--
+-- now using a partitioned table
+--
+create table iocdu_tt_parted (a int primary key, b text) partition by list (a);
+create table iocdu_tt_parted1 partition of iocdu_tt_parted for values in (1);
+create table iocdu_tt_parted2 partition of iocdu_tt_parted for values in (2);
+create table iocdu_tt_parted3 partition of iocdu_tt_parted for values in (3);
+create table iocdu_tt_parted4 partition of iocdu_tt_parted for values in (4);
+create trigger iocdu_tt_parted_insert_trig
+ after insert on iocdu_tt_parted referencing new table as new_table
+ for each statement execute procedure dump_insert();
+create trigger iocdu_tt_parted_update_trig
+ after update on iocdu_tt_parted referencing old table as old_table new table as new_table
+ for each statement execute procedure dump_update();
+-- inserts only
+insert into iocdu_tt_parted values (1, 'AAA'), (2, 'BBB')
+ on conflict (a) do
+ update set b = iocdu_tt_parted.b || ':' || excluded.b;
+NOTICE: trigger = iocdu_tt_parted_update_trig, old table = <NULL>, new table = <NULL>
+NOTICE: trigger = iocdu_tt_parted_insert_trig, new table = (1,AAA), (2,BBB)
+-- mixture of inserts and updates
+insert into iocdu_tt_parted values (1, 'AAA'), (2, 'BBB'), (3, 'CCC'), (4, 'DDD')
+ on conflict (a) do
+ update set b = iocdu_tt_parted.b || ':' || excluded.b;
+NOTICE: trigger = iocdu_tt_parted_update_trig, old table = (1,AAA), (2,BBB), new table = (1,AAA:AAA), (2,BBB:BBB)
+NOTICE: trigger = iocdu_tt_parted_insert_trig, new table = (3,CCC), (4,DDD)
+-- updates only
+insert into iocdu_tt_parted values (3, 'CCC'), (4, 'DDD')
+ on conflict (a) do
+ update set b = iocdu_tt_parted.b || ':' || excluded.b;
+NOTICE: trigger = iocdu_tt_parted_update_trig, old table = (3,CCC), (4,DDD), new table = (3,CCC:CCC), (4,DDD:DDD)
+NOTICE: trigger = iocdu_tt_parted_insert_trig, new table = <NULL>
+drop table iocdu_tt_parted;
+--
-- Verify that you can't create a trigger with transition tables for
-- more than one event.
--
diff --git a/src/test/regress/sql/insert_conflict.sql b/src/test/regress/sql/insert_conflict.sql
index 32c647e3f88..6c50fd61eb6 100644
--- a/src/test/regress/sql/insert_conflict.sql
+++ b/src/test/regress/sql/insert_conflict.sql
@@ -472,15 +472,90 @@ select * from selfconflict;
drop table selfconflict;
--- check that the following works:
--- insert into partitioned_table on conflict do nothing
-create table parted_conflict_test (a int, b char) partition by list (a);
-create table parted_conflict_test_1 partition of parted_conflict_test (b unique) for values in (1);
-insert into parted_conflict_test values (1, 'a') on conflict do nothing;
+-- check ON CONFLICT handling with partitioned tables
+create table parted_conflict_test (a int unique, b char) partition by list (a);
+create table parted_conflict_test_1 partition of parted_conflict_test (b unique) for values in (1, 2);
+
+-- no indexes required here
insert into parted_conflict_test values (1, 'a') on conflict do nothing;
--- however, on conflict do update is not supported yet
-insert into parted_conflict_test values (1) on conflict (b) do update set a = excluded.a;
--- but it works OK if we target the partition directly
-insert into parted_conflict_test_1 values (1) on conflict (b) do
-update set a = excluded.a;
+
+-- index on a required, which does exist in parent
+insert into parted_conflict_test values (1, 'a') on conflict (a) do nothing;
+insert into parted_conflict_test values (1, 'a') on conflict (a) do update set b = excluded.b;
+
+-- targeting partition directly will work
+insert into parted_conflict_test_1 values (1, 'a') on conflict (a) do nothing;
+insert into parted_conflict_test_1 values (1, 'b') on conflict (a) do update set b = excluded.b;
+
+-- index on b required, which doesn't exist in parent
+insert into parted_conflict_test values (2, 'b') on conflict (b) do update set a = excluded.a;
+
+-- targeting partition directly will work
+insert into parted_conflict_test_1 values (2, 'b') on conflict (b) do update set a = excluded.a;
+
+-- should see (2, 'b')
+select * from parted_conflict_test order by a;
+
+-- now check that DO UPDATE works correctly for target partition with
+-- different attribute numbers
+create table parted_conflict_test_2 (b char, a int unique);
+alter table parted_conflict_test attach partition parted_conflict_test_2 for values in (3);
+truncate parted_conflict_test;
+insert into parted_conflict_test values (3, 'a') on conflict (a) do update set b = excluded.b;
+insert into parted_conflict_test values (3, 'b') on conflict (a) do update set b = excluded.b;
+
+-- should see (3, 'b')
+select * from parted_conflict_test order by a;
+
+-- case where parent will have a dropped column, but the partition won't
+alter table parted_conflict_test drop b, add b char;
+create table parted_conflict_test_3 partition of parted_conflict_test for values in (4);
+truncate parted_conflict_test;
+insert into parted_conflict_test (a, b) values (4, 'a') on conflict (a) do update set b = excluded.b;
+insert into parted_conflict_test (a, b) values (4, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a';
+
+-- should see (4, 'b')
+select * from parted_conflict_test order by a;
+
+-- case with multi-level partitioning
+create table parted_conflict_test_4 partition of parted_conflict_test for values in (5) partition by list (a);
+create table parted_conflict_test_4_1 partition of parted_conflict_test_4 for values in (5);
+truncate parted_conflict_test;
+insert into parted_conflict_test (a, b) values (5, 'a') on conflict (a) do update set b = excluded.b;
+insert into parted_conflict_test (a, b) values (5, 'b') on conflict (a) do update set b = excluded.b where parted_conflict_test.b = 'a';
+
+-- should see (5, 'b')
+select * from parted_conflict_test order by a;
+
+-- test with multiple rows
+truncate parted_conflict_test;
+insert into parted_conflict_test (a, b) values (1, 'a'), (2, 'a'), (4, 'a') on conflict (a) do update set b = excluded.b where excluded.b = 'b';
+insert into parted_conflict_test (a, b) values (1, 'b'), (2, 'c'), (4, 'b') on conflict (a) do update set b = excluded.b where excluded.b = 'b';
+
+-- should see (1, 'b'), (2, 'a'), (4, 'b')
+select * from parted_conflict_test order by a;
+
drop table parted_conflict_test;
+
+-- test behavior of inserting a conflicting tuple into an intermediate
+-- partitioning level
+create table parted_conflict (a int primary key, b text) partition by range (a);
+create table parted_conflict_1 partition of parted_conflict for values from (0) to (1000) partition by range (a);
+create table parted_conflict_1_1 partition of parted_conflict_1 for values from (0) to (500);
+insert into parted_conflict values (40, 'forty');
+insert into parted_conflict_1 values (40, 'cuarenta')
+ on conflict (a) do update set b = excluded.b;
+drop table parted_conflict;
+
+-- same thing, but this time try to use an index that's created not in the
+-- partition
+create table parted_conflict (a int, b text) partition by range (a);
+create table parted_conflict_1 partition of parted_conflict for values from (0) to (1000) partition by range (a);
+create unique index on only parted_conflict_1 (a);
+create unique index on only parted_conflict (a);
+alter index parted_conflict_a_idx attach partition parted_conflict_1_a_idx;
+create table parted_conflict_1_1 partition of parted_conflict_1 for values from (0) to (500);
+insert into parted_conflict values (40, 'forty');
+insert into parted_conflict_1 values (40, 'cuarenta')
+ on conflict (a) do update set b = excluded.b;
+drop table parted_conflict;
diff --git a/src/test/regress/sql/triggers.sql b/src/test/regress/sql/triggers.sql
index 8be893bd1e3..9d3e0ef7079 100644
--- a/src/test/regress/sql/triggers.sql
+++ b/src/test/regress/sql/triggers.sql
@@ -1983,6 +1983,39 @@ insert into my_table values (3, 'CCC'), (4, 'DDD')
update set b = my_table.b || ':' || excluded.b;
--
+-- now using a partitioned table
+--
+
+create table iocdu_tt_parted (a int primary key, b text) partition by list (a);
+create table iocdu_tt_parted1 partition of iocdu_tt_parted for values in (1);
+create table iocdu_tt_parted2 partition of iocdu_tt_parted for values in (2);
+create table iocdu_tt_parted3 partition of iocdu_tt_parted for values in (3);
+create table iocdu_tt_parted4 partition of iocdu_tt_parted for values in (4);
+create trigger iocdu_tt_parted_insert_trig
+ after insert on iocdu_tt_parted referencing new table as new_table
+ for each statement execute procedure dump_insert();
+create trigger iocdu_tt_parted_update_trig
+ after update on iocdu_tt_parted referencing old table as old_table new table as new_table
+ for each statement execute procedure dump_update();
+
+-- inserts only
+insert into iocdu_tt_parted values (1, 'AAA'), (2, 'BBB')
+ on conflict (a) do
+ update set b = iocdu_tt_parted.b || ':' || excluded.b;
+
+-- mixture of inserts and updates
+insert into iocdu_tt_parted values (1, 'AAA'), (2, 'BBB'), (3, 'CCC'), (4, 'DDD')
+ on conflict (a) do
+ update set b = iocdu_tt_parted.b || ':' || excluded.b;
+
+-- updates only
+insert into iocdu_tt_parted values (3, 'CCC'), (4, 'DDD')
+ on conflict (a) do
+ update set b = iocdu_tt_parted.b || ':' || excluded.b;
+
+drop table iocdu_tt_parted;
+
+--
-- Verify that you can't create a trigger with transition tables for
-- more than one event.
--