From 4bea91f21f61d01bd40a4191a4a8c82d0959fffe Mon Sep 17 00:00:00 2001 From: Masahiko Sawada Date: Mon, 20 Oct 2025 10:38:52 -0700 Subject: Support COPY TO for partitioned tables. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, COPY TO command didn't support directly specifying partitioned tables so users had to use COPY (SELECT ...) TO variant. This commit adds direct COPY TO support for partitioned tables, improving both usability and performance. Performance tests show it's faster than the COPY (SELECT ...) TO variant as it avoids the overheads of query processing and sending results to the COPY TO command. When used with partitioned tables, COPY TO copies the same rows as SELECT * FROM table. Row-level security policies of the partitioned table are applied in the same way as when executing COPY TO on a plain table. Author: jian he Reviewed-by: vignesh C Reviewed-by: David Rowley Reviewed-by: Melih Mutlu Reviewed-by: Kirill Reshke Reviewed-by: Atsushi Torikoshi Reviewed-by: Álvaro Herrera Reviewed-by: Masahiko Sawada Reviewed-by: Chao Li Discussion: https://postgr.es/m/CACJufxEZt%2BG19Ors3bQUq-42-61__C%3Dy5k2wk%3DsHEFRusu7%3DiQ%40mail.gmail.com --- src/backend/commands/copy.c | 6 +- src/backend/commands/copyto.c | 153 ++++++++++++++++++++++++------ src/test/regress/expected/copy.out | 20 ++++ src/test/regress/expected/rowsecurity.out | 21 ++++ src/test/regress/sql/copy.sql | 14 +++ src/test/regress/sql/rowsecurity.sql | 3 + 6 files changed, 186 insertions(+), 31 deletions(-) (limited to 'src') diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index fae9c41db65..44020d0ae80 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -251,11 +251,15 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt, * relation which we have opened and locked. Use "ONLY" so that * COPY retrieves rows from only the target table not any * inheritance children, the same as when RLS doesn't apply. + * + * However, when copying data from a partitioned table, we don't + * use "ONLY", since we need to retrieve rows from its descendant + * tables too. */ from = makeRangeVar(get_namespace_name(RelationGetNamespace(rel)), pstrdup(RelationGetRelationName(rel)), -1); - from->inh = false; /* apply ONLY */ + from->inh = (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); /* Build query */ select = makeNode(SelectStmt); diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c index e5781155cdf..a1919c6db43 100644 --- a/src/backend/commands/copyto.c +++ b/src/backend/commands/copyto.c @@ -18,7 +18,9 @@ #include #include +#include "access/table.h" #include "access/tableam.h" +#include "catalog/pg_inherits.h" #include "commands/copyapi.h" #include "commands/progress.h" #include "executor/execdesc.h" @@ -86,6 +88,7 @@ typedef struct CopyToStateData CopyFormatOptions opts; Node *whereClause; /* WHERE condition (or NULL) */ + List *partitions; /* OID list of partitions to copy data from */ /* * Working state @@ -116,6 +119,8 @@ static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot); static void CopyAttributeOutText(CopyToState cstate, const char *string); static void CopyAttributeOutCSV(CopyToState cstate, const char *string, bool use_quote); +static void CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel, + uint64 *processed); /* built-in format-specific routines */ static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc); @@ -602,6 +607,10 @@ EndCopy(CopyToState cstate) pgstat_progress_end_command(); MemoryContextDelete(cstate->copycontext); + + if (cstate->partitions) + list_free(cstate->partitions); + pfree(cstate); } @@ -643,6 +652,7 @@ BeginCopyTo(ParseState *pstate, PROGRESS_COPY_COMMAND_TO, 0 }; + List *children = NIL; if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION) { @@ -673,11 +683,34 @@ BeginCopyTo(ParseState *pstate, errmsg("cannot copy from sequence \"%s\"", RelationGetRelationName(rel)))); else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) - ereport(ERROR, - (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("cannot copy from partitioned table \"%s\"", - RelationGetRelationName(rel)), - errhint("Try the COPY (SELECT ...) TO variant."))); + { + /* + * Collect OIDs of relation containing data, so that later + * DoCopyTo can copy the data from them. + */ + children = find_all_inheritors(RelationGetRelid(rel), AccessShareLock, NULL); + + foreach_oid(child, children) + { + char relkind = get_rel_relkind(child); + + if (relkind == RELKIND_FOREIGN_TABLE) + { + char *relation_name = get_rel_name(child); + + ereport(ERROR, + errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("cannot copy from foreign table \"%s\"", relation_name), + errdetail("Partition \"%s\" is a foreign table in partitioned table \"%s\"", + relation_name, RelationGetRelationName(rel)), + errhint("Try the COPY (SELECT ...) TO variant.")); + } + + /* Exclude tables with no data */ + if (RELKIND_HAS_PARTITIONS(relkind)) + children = foreach_delete_current(children, child); + } + } else ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), @@ -713,6 +746,7 @@ BeginCopyTo(ParseState *pstate, cstate->rel = rel; tupDesc = RelationGetDescr(cstate->rel); + cstate->partitions = children; } else { @@ -722,6 +756,7 @@ BeginCopyTo(ParseState *pstate, DestReceiver *dest; cstate->rel = NULL; + cstate->partitions = NIL; /* * Run parse analysis and rewrite. Note this also acquires sufficient @@ -1030,7 +1065,7 @@ DoCopyTo(CopyToState cstate) TupleDesc tupDesc; int num_phys_attrs; ListCell *cur; - uint64 processed; + uint64 processed = 0; if (fe_copy) SendCopyBegin(cstate); @@ -1070,33 +1105,24 @@ DoCopyTo(CopyToState cstate) if (cstate->rel) { - TupleTableSlot *slot; - TableScanDesc scandesc; - - scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL); - slot = table_slot_create(cstate->rel, NULL); - - processed = 0; - while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot)) + /* + * If COPY TO source table is a partitioned table, then open each + * partition and process each individual partition. + */ + if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { - CHECK_FOR_INTERRUPTS(); - - /* Deconstruct the tuple ... */ - slot_getallattrs(slot); - - /* Format and send the data */ - CopyOneRowTo(cstate, slot); + foreach_oid(child, cstate->partitions) + { + Relation scan_rel; - /* - * Increment the number of processed tuples, and report the - * progress. - */ - pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, - ++processed); + /* We already got the lock in BeginCopyTo */ + scan_rel = table_open(child, NoLock); + CopyRelationTo(cstate, scan_rel, cstate->rel, &processed); + table_close(scan_rel, NoLock); + } } - - ExecDropSingleTupleTableSlot(slot); - table_endscan(scandesc); + else + CopyRelationTo(cstate, cstate->rel, NULL, &processed); } else { @@ -1115,6 +1141,73 @@ DoCopyTo(CopyToState cstate) return processed; } +/* + * Scans a single table and exports its rows to the COPY destination. + * + * root_rel can be set to the root table of rel if rel is a partition + * table so that we can send tuples in root_rel's rowtype, which might + * differ from individual partitions. +*/ +static void +CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel, uint64 *processed) +{ + TupleTableSlot *slot; + TableScanDesc scandesc; + AttrMap *map = NULL; + TupleTableSlot *root_slot = NULL; + + scandesc = table_beginscan(rel, GetActiveSnapshot(), 0, NULL); + slot = table_slot_create(rel, NULL); + + /* + * If we are exporting partition data here, we check if converting tuples + * to the root table's rowtype, because a partition might have column + * order different than its root table. + */ + if (root_rel != NULL) + { + root_slot = table_slot_create(root_rel, NULL); + map = build_attrmap_by_name_if_req(RelationGetDescr(root_rel), + RelationGetDescr(rel), + false); + } + + while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot)) + { + TupleTableSlot *copyslot; + + CHECK_FOR_INTERRUPTS(); + + if (map != NULL) + copyslot = execute_attr_map_slot(map, slot, root_slot); + else + { + /* Deconstruct the tuple */ + slot_getallattrs(slot); + copyslot = slot; + } + + /* Format and send the data */ + CopyOneRowTo(cstate, copyslot); + + /* + * Increment the number of processed tuples, and report the progress. + */ + pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, + ++(*processed)); + } + + ExecDropSingleTupleTableSlot(slot); + + if (root_slot != NULL) + ExecDropSingleTupleTableSlot(root_slot); + + if (map != NULL) + free_attrmap(map); + + table_endscan(scandesc); +} + /* * Emit one row during DoCopyTo(). */ diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out index ac66eb55aee..24e0f472f14 100644 --- a/src/test/regress/expected/copy.out +++ b/src/test/regress/expected/copy.out @@ -373,3 +373,23 @@ COPY copytest_mv(id) TO stdout WITH (header); id 1 DROP MATERIALIZED VIEW copytest_mv; +-- Tests for COPY TO with partitioned tables. +-- The child table pp_2 has a different column order than the root table pp. +-- Check if COPY TO exports tuples as the root table's column order. +CREATE TABLE pp (id int,val int) PARTITION BY RANGE (id); +CREATE TABLE pp_1 (val int, id int) PARTITION BY RANGE (id); +CREATE TABLE pp_2 (id int, val int) PARTITION BY RANGE (id); +ALTER TABLE pp ATTACH PARTITION pp_1 FOR VALUES FROM (1) TO (5); +ALTER TABLE pp ATTACH PARTITION pp_2 FOR VALUES FROM (5) TO (10); +CREATE TABLE pp_15 PARTITION OF pp_1 FOR VALUES FROM (1) TO (5); +CREATE TABLE pp_510 PARTITION OF pp_2 FOR VALUES FROM (5) TO (10); +INSERT INTO pp SELECT g, 10 + g FROM generate_series(1,6) g; +COPY pp TO stdout(header); +id val +1 11 +2 12 +3 13 +4 14 +5 15 +6 16 +DROP TABLE PP; diff --git a/src/test/regress/expected/rowsecurity.out b/src/test/regress/expected/rowsecurity.out index 5a172c5d91c..42b78a24603 100644 --- a/src/test/regress/expected/rowsecurity.out +++ b/src/test/regress/expected/rowsecurity.out @@ -986,6 +986,11 @@ NOTICE: f_leak => my first satire 9 | 11 | 1 | regress_rls_dave | awesome science fiction (4 rows) +COPY part_document TO stdout WITH (DELIMITER ','); +1,11,1,regress_rls_bob,my first novel +6,11,1,regress_rls_carol,great science fiction +9,11,1,regress_rls_dave,awesome science fiction +4,55,1,regress_rls_bob,my first satire EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); QUERY PLAN ------------------------------------------------------------------------- @@ -1028,6 +1033,17 @@ NOTICE: f_leak => awesome technology book 10 | 99 | 2 | regress_rls_dave | awesome technology book (10 rows) +COPY part_document TO stdout WITH (DELIMITER ','); +1,11,1,regress_rls_bob,my first novel +2,11,2,regress_rls_bob,my second novel +6,11,1,regress_rls_carol,great science fiction +9,11,1,regress_rls_dave,awesome science fiction +4,55,1,regress_rls_bob,my first satire +8,55,2,regress_rls_carol,great satire +3,99,2,regress_rls_bob,my science textbook +5,99,2,regress_rls_bob,my history book +7,99,2,regress_rls_carol,great technology book +10,99,2,regress_rls_dave,awesome technology book EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); QUERY PLAN ------------------------------------------------------------------------- @@ -1058,6 +1074,11 @@ NOTICE: f_leak => awesome science fiction 9 | 11 | 1 | regress_rls_dave | awesome science fiction (4 rows) +COPY part_document TO stdout WITH (DELIMITER ','); +1,11,1,regress_rls_bob,my first novel +2,11,2,regress_rls_bob,my second novel +6,11,1,regress_rls_carol,great science fiction +9,11,1,regress_rls_dave,awesome science fiction EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); QUERY PLAN ---------------------------------------------------------------------------------- diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql index a1316c73bac..676a8b342b5 100644 --- a/src/test/regress/sql/copy.sql +++ b/src/test/regress/sql/copy.sql @@ -405,3 +405,17 @@ COPY copytest_mv(id) TO stdout WITH (header); REFRESH MATERIALIZED VIEW copytest_mv; COPY copytest_mv(id) TO stdout WITH (header); DROP MATERIALIZED VIEW copytest_mv; + +-- Tests for COPY TO with partitioned tables. +-- The child table pp_2 has a different column order than the root table pp. +-- Check if COPY TO exports tuples as the root table's column order. +CREATE TABLE pp (id int,val int) PARTITION BY RANGE (id); +CREATE TABLE pp_1 (val int, id int) PARTITION BY RANGE (id); +CREATE TABLE pp_2 (id int, val int) PARTITION BY RANGE (id); +ALTER TABLE pp ATTACH PARTITION pp_1 FOR VALUES FROM (1) TO (5); +ALTER TABLE pp ATTACH PARTITION pp_2 FOR VALUES FROM (5) TO (10); +CREATE TABLE pp_15 PARTITION OF pp_1 FOR VALUES FROM (1) TO (5); +CREATE TABLE pp_510 PARTITION OF pp_2 FOR VALUES FROM (5) TO (10); +INSERT INTO pp SELECT g, 10 + g FROM generate_series(1,6) g; +COPY pp TO stdout(header); +DROP TABLE PP; diff --git a/src/test/regress/sql/rowsecurity.sql b/src/test/regress/sql/rowsecurity.sql index 21ac0ca51ee..2d1be543391 100644 --- a/src/test/regress/sql/rowsecurity.sql +++ b/src/test/regress/sql/rowsecurity.sql @@ -362,16 +362,19 @@ SELECT * FROM pg_policies WHERE schemaname = 'regress_rls_schema' AND tablename SET SESSION AUTHORIZATION regress_rls_bob; SET row_security TO ON; SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did; +COPY part_document TO stdout WITH (DELIMITER ','); EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); -- viewpoint from regress_rls_carol SET SESSION AUTHORIZATION regress_rls_carol; SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did; +COPY part_document TO stdout WITH (DELIMITER ','); EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); -- viewpoint from regress_rls_dave SET SESSION AUTHORIZATION regress_rls_dave; SELECT * FROM part_document WHERE f_leak(dtitle) ORDER BY did; +COPY part_document TO stdout WITH (DELIMITER ','); EXPLAIN (COSTS OFF) SELECT * FROM part_document WHERE f_leak(dtitle); -- pp1 ERROR -- cgit v1.2.3