summaryrefslogtreecommitdiff
path: root/src/backend/commands/copyto.c
diff options
context:
space:
mode:
authorMasahiko Sawada <msawada@postgresql.org>2025-10-20 10:38:52 -0700
committerMasahiko Sawada <msawada@postgresql.org>2025-10-20 10:38:52 -0700
commit4bea91f21f61d01bd40a4191a4a8c82d0959fffe (patch)
tree818ad75c459535a93f9317b1870802416bd4ae69 /src/backend/commands/copyto.c
parentd74cfe3263fa0a35cb962570697f422775cd12d6 (diff)
Support COPY TO for partitioned tables.
Previously, COPY TO command didn't support directly specifying partitioned tables so users had to use COPY (SELECT ...) TO variant. This commit adds direct COPY TO support for partitioned tables, improving both usability and performance. Performance tests show it's faster than the COPY (SELECT ...) TO variant as it avoids the overheads of query processing and sending results to the COPY TO command. When used with partitioned tables, COPY TO copies the same rows as SELECT * FROM table. Row-level security policies of the partitioned table are applied in the same way as when executing COPY TO on a plain table. Author: jian he <jian.universality@gmail.com> Reviewed-by: vignesh C <vignesh21@gmail.com> Reviewed-by: David Rowley <dgrowleyml@gmail.com> Reviewed-by: Melih Mutlu <m.melihmutlu@gmail.com> Reviewed-by: Kirill Reshke <reshkekirill@gmail.com> Reviewed-by: Atsushi Torikoshi <torikoshia@oss.nttdata.com> Reviewed-by: Álvaro Herrera <alvherre@kurilemu.de> Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com> Reviewed-by: Chao Li <li.evan.chao@gmail.com> Discussion: https://postgr.es/m/CACJufxEZt%2BG19Ors3bQUq-42-61__C%3Dy5k2wk%3DsHEFRusu7%3DiQ%40mail.gmail.com
Diffstat (limited to 'src/backend/commands/copyto.c')
-rw-r--r--src/backend/commands/copyto.c153
1 files changed, 123 insertions, 30 deletions
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index e5781155cdf..a1919c6db43 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -18,7 +18,9 @@
#include <unistd.h>
#include <sys/stat.h>
+#include "access/table.h"
#include "access/tableam.h"
+#include "catalog/pg_inherits.h"
#include "commands/copyapi.h"
#include "commands/progress.h"
#include "executor/execdesc.h"
@@ -86,6 +88,7 @@ typedef struct CopyToStateData
CopyFormatOptions opts;
Node *whereClause; /* WHERE condition (or NULL) */
+ List *partitions; /* OID list of partitions to copy data from */
/*
* Working state
@@ -116,6 +119,8 @@ static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot);
static void CopyAttributeOutText(CopyToState cstate, const char *string);
static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
bool use_quote);
+static void CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel,
+ uint64 *processed);
/* built-in format-specific routines */
static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc);
@@ -602,6 +607,10 @@ EndCopy(CopyToState cstate)
pgstat_progress_end_command();
MemoryContextDelete(cstate->copycontext);
+
+ if (cstate->partitions)
+ list_free(cstate->partitions);
+
pfree(cstate);
}
@@ -643,6 +652,7 @@ BeginCopyTo(ParseState *pstate,
PROGRESS_COPY_COMMAND_TO,
0
};
+ List *children = NIL;
if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
{
@@ -673,11 +683,34 @@ BeginCopyTo(ParseState *pstate,
errmsg("cannot copy from sequence \"%s\"",
RelationGetRelationName(rel))));
else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
- ereport(ERROR,
- (errcode(ERRCODE_WRONG_OBJECT_TYPE),
- errmsg("cannot copy from partitioned table \"%s\"",
- RelationGetRelationName(rel)),
- errhint("Try the COPY (SELECT ...) TO variant.")));
+ {
+ /*
+ * Collect OIDs of relation containing data, so that later
+ * DoCopyTo can copy the data from them.
+ */
+ children = find_all_inheritors(RelationGetRelid(rel), AccessShareLock, NULL);
+
+ foreach_oid(child, children)
+ {
+ char relkind = get_rel_relkind(child);
+
+ if (relkind == RELKIND_FOREIGN_TABLE)
+ {
+ char *relation_name = get_rel_name(child);
+
+ ereport(ERROR,
+ errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("cannot copy from foreign table \"%s\"", relation_name),
+ errdetail("Partition \"%s\" is a foreign table in partitioned table \"%s\"",
+ relation_name, RelationGetRelationName(rel)),
+ errhint("Try the COPY (SELECT ...) TO variant."));
+ }
+
+ /* Exclude tables with no data */
+ if (RELKIND_HAS_PARTITIONS(relkind))
+ children = foreach_delete_current(children, child);
+ }
+ }
else
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
@@ -713,6 +746,7 @@ BeginCopyTo(ParseState *pstate,
cstate->rel = rel;
tupDesc = RelationGetDescr(cstate->rel);
+ cstate->partitions = children;
}
else
{
@@ -722,6 +756,7 @@ BeginCopyTo(ParseState *pstate,
DestReceiver *dest;
cstate->rel = NULL;
+ cstate->partitions = NIL;
/*
* Run parse analysis and rewrite. Note this also acquires sufficient
@@ -1030,7 +1065,7 @@ DoCopyTo(CopyToState cstate)
TupleDesc tupDesc;
int num_phys_attrs;
ListCell *cur;
- uint64 processed;
+ uint64 processed = 0;
if (fe_copy)
SendCopyBegin(cstate);
@@ -1070,33 +1105,24 @@ DoCopyTo(CopyToState cstate)
if (cstate->rel)
{
- TupleTableSlot *slot;
- TableScanDesc scandesc;
-
- scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
- slot = table_slot_create(cstate->rel, NULL);
-
- processed = 0;
- while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
+ /*
+ * If COPY TO source table is a partitioned table, then open each
+ * partition and process each individual partition.
+ */
+ if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
{
- CHECK_FOR_INTERRUPTS();
-
- /* Deconstruct the tuple ... */
- slot_getallattrs(slot);
-
- /* Format and send the data */
- CopyOneRowTo(cstate, slot);
+ foreach_oid(child, cstate->partitions)
+ {
+ Relation scan_rel;
- /*
- * Increment the number of processed tuples, and report the
- * progress.
- */
- pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
- ++processed);
+ /* We already got the lock in BeginCopyTo */
+ scan_rel = table_open(child, NoLock);
+ CopyRelationTo(cstate, scan_rel, cstate->rel, &processed);
+ table_close(scan_rel, NoLock);
+ }
}
-
- ExecDropSingleTupleTableSlot(slot);
- table_endscan(scandesc);
+ else
+ CopyRelationTo(cstate, cstate->rel, NULL, &processed);
}
else
{
@@ -1116,6 +1142,73 @@ DoCopyTo(CopyToState cstate)
}
/*
+ * Scans a single table and exports its rows to the COPY destination.
+ *
+ * root_rel can be set to the root table of rel if rel is a partition
+ * table so that we can send tuples in root_rel's rowtype, which might
+ * differ from individual partitions.
+*/
+static void
+CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel, uint64 *processed)
+{
+ TupleTableSlot *slot;
+ TableScanDesc scandesc;
+ AttrMap *map = NULL;
+ TupleTableSlot *root_slot = NULL;
+
+ scandesc = table_beginscan(rel, GetActiveSnapshot(), 0, NULL);
+ slot = table_slot_create(rel, NULL);
+
+ /*
+ * If we are exporting partition data here, we check if converting tuples
+ * to the root table's rowtype, because a partition might have column
+ * order different than its root table.
+ */
+ if (root_rel != NULL)
+ {
+ root_slot = table_slot_create(root_rel, NULL);
+ map = build_attrmap_by_name_if_req(RelationGetDescr(root_rel),
+ RelationGetDescr(rel),
+ false);
+ }
+
+ while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
+ {
+ TupleTableSlot *copyslot;
+
+ CHECK_FOR_INTERRUPTS();
+
+ if (map != NULL)
+ copyslot = execute_attr_map_slot(map, slot, root_slot);
+ else
+ {
+ /* Deconstruct the tuple */
+ slot_getallattrs(slot);
+ copyslot = slot;
+ }
+
+ /* Format and send the data */
+ CopyOneRowTo(cstate, copyslot);
+
+ /*
+ * Increment the number of processed tuples, and report the progress.
+ */
+ pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
+ ++(*processed));
+ }
+
+ ExecDropSingleTupleTableSlot(slot);
+
+ if (root_slot != NULL)
+ ExecDropSingleTupleTableSlot(root_slot);
+
+ if (map != NULL)
+ free_attrmap(map);
+
+ table_endscan(scandesc);
+}
+
+/*
* Emit one row during DoCopyTo().
*/
static inline void