From 8c58624df46222d4d09c5655d8350f3b037880c8 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Wed, 8 Feb 2023 07:58:25 +0530 Subject: Fix the logical replication timeout during large DDLs. The DDLs like Refresh Materialized views that generate lots of temporary data due to rewrite rules may not be processed by output plugins (for example pgoutput). So, we won't send keep-alive messages for a long time while processing such commands and that can lead the subscriber side to timeout. We have previously fixed a similar case for large transactions in commit f95d53eded where the output plugin filters all or most of the changes but missed to handle the DDLs. We decided not to backpatch this as this adds a new callback in the existing exposed structure and moreover, users can increase the wal_sender_timeout and wal_receiver_timeout to avoid this problem. Author: Wang wei, Hou Zhijie Reviewed-by: Peter Smith, Ashutosh Bapat, Shi yu, Amit Kapila Discussion: https://postgr.es/m/OS3PR01MB6275478E5D29E4A563302D3D9E2B9@OS3PR01MB6275.jpnprd01.prod.outlook.com Discussion: https://postgr.es/m/CAA5-nLARN7-3SLU_QUxfy510pmrYK6JJb=bk3hcgemAM_pAv+w@mail.gmail.com --- src/backend/replication/logical/reorderbuffer.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'src/backend/replication/logical/reorderbuffer.c') diff --git a/src/backend/replication/logical/reorderbuffer.c b/src/backend/replication/logical/reorderbuffer.c index 0468d12936f..d5f90a5f5d2 100644 --- a/src/backend/replication/logical/reorderbuffer.c +++ b/src/backend/replication/logical/reorderbuffer.c @@ -2100,6 +2100,8 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, PG_TRY(); { ReorderBufferChange *change; + int changes_count = 0; /* used to accumulate the number of + * changes */ if (using_subtxn) BeginInternalSubTransaction(streaming ? "stream" : "replay"); @@ -2440,6 +2442,24 @@ ReorderBufferProcessTXN(ReorderBuffer *rb, ReorderBufferTXN *txn, elog(ERROR, "tuplecid value in changequeue"); break; } + + /* + * It is possible that the data is not sent to downstream for a + * long time either because the output plugin filtered it or there + * is a DDL that generates a lot of data that is not processed by + * the plugin. So, in such cases, the downstream can timeout. To + * avoid that we try to send a keepalive message if required. + * Trying to send a keepalive message after every change has some + * overhead, but testing showed there is no noticeable overhead if + * we do it after every ~100 changes. + */ +#define CHANGES_THRESHOLD 100 + + if (++changes_count >= CHANGES_THRESHOLD) + { + rb->update_progress_txn(rb, txn, change->lsn); + changes_count = 0; + } } /* speculative insertion record must be freed by now */ -- cgit v1.2.3