From ab55d742eb7162c22ee60f1e15e07d2a60063c4e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 11 Jun 2021 16:12:36 -0400 Subject: Fix multiple crasher bugs in partitioned-table replication logic. apply_handle_tuple_routing(), having detected and reported that the tuple it needed to update didn't exist, tried to update that tuple anyway, leading to a null-pointer dereference. logicalrep_partition_open() failed to ensure that the LogicalRepPartMapEntry it built for a partition was fully independent of that for the partition root, leading to trouble if the root entry was later freed or rebuilt. Meanwhile, on the publisher's side, pgoutput_change() sometimes attempted to apply execute_attr_map_tuple() to a NULL tuple. The first of these was reported by Sergey Bernikov in bug #17055; I found the other two while developing some test cases for this sadly under-tested code. Diagnosis and patch for the first issue by Amit Langote; patches for the others by me; new test cases by me. Back-patch to v13 where this logic came in. Discussion: https://postgr.es/m/17055-9ba800ec8522668b@postgresql.org --- src/backend/replication/logical/worker.c | 48 ++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 21 deletions(-) (limited to 'src/backend/replication/logical/worker.c') diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 98c26002e83..689a66cc72d 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -1477,12 +1477,13 @@ apply_handle_update_internal(ApplyExecutionData *edata, else { /* - * The tuple to be updated could not be found. + * The tuple to be updated could not be found. Do nothing except for + * emitting a log message. * - * TODO what to do here, change the log level to LOG perhaps? + * XXX should this be promoted to ereport(LOG) perhaps? */ elog(DEBUG1, - "logical replication did not find row for update " + "logical replication did not find row to be updated " "in replication target relation \"%s\"", RelationGetRelationName(localrel)); } @@ -1589,9 +1590,14 @@ apply_handle_delete_internal(ApplyExecutionData *edata, } else { - /* The tuple to be deleted could not be found. */ + /* + * The tuple to be deleted could not be found. Do nothing except for + * emitting a log message. + * + * XXX should this be promoted to ereport(LOG) perhaps? + */ elog(DEBUG1, - "logical replication did not find row for delete " + "logical replication did not find row to be deleted " "in replication target relation \"%s\"", RelationGetRelationName(localrel)); } @@ -1728,30 +1734,30 @@ apply_handle_tuple_routing(ApplyExecutionData *edata, found = FindReplTupleInLocalRel(estate, partrel, &part_entry->remoterel, remoteslot_part, &localslot); - - oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); - if (found) - { - /* Apply the update. */ - slot_modify_data(remoteslot_part, localslot, - part_entry, - newtup); - MemoryContextSwitchTo(oldctx); - } - else + if (!found) { /* - * The tuple to be updated could not be found. + * The tuple to be updated could not be found. Do nothing + * except for emitting a log message. * - * TODO what to do here, change the log level to LOG - * perhaps? + * XXX should this be promoted to ereport(LOG) perhaps? */ elog(DEBUG1, - "logical replication did not find row for update " - "in replication target relation \"%s\"", + "logical replication did not find row to be updated " + "in replication target relation's partition \"%s\"", RelationGetRelationName(partrel)); + return; } + /* + * Apply the update to the local tuple, putting the result in + * remoteslot_part. + */ + oldctx = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate)); + slot_modify_data(remoteslot_part, localslot, part_entry, + newtup); + MemoryContextSwitchTo(oldctx); + /* * Does the updated tuple still satisfy the current * partition's constraint? -- cgit v1.2.3