diff options
| author | Robert Haas <rhaas@postgresql.org> | 2022-03-24 14:36:06 -0400 |
|---|---|---|
| committer | Robert Haas <rhaas@postgresql.org> | 2022-03-24 14:36:06 -0400 |
| commit | 1ce14b6b2fe4af9ac7d7f90eb46c77b2e6deb2de (patch) | |
| tree | ee1c6741a5ccfdf14c781b541d022ca08984a920 /src/include | |
| parent | c0f99bb520da577f34cf7c10e1ea4aab727f08c7 (diff) | |
Fix possible recovery trouble if TRUNCATE overlaps a checkpoint.
If TRUNCATE causes some buffers to be invalidated and thus the
checkpoint does not flush them, TRUNCATE must also ensure that the
corresponding files are truncated on disk. Otherwise, a replay
from the checkpoint might find that the buffers exist but have
the wrong contents, which may cause replay to fail.
Report by Teja Mupparti. Patch by Kyotaro Horiguchi, per a design
suggestion from Heikki Linnakangas, with some changes to the
comments by me. Review of this and a prior patch that approached
the issue differently by Heikki Linnakangas, Andres Freund, Álvaro
Herrera, Masahiko Sawada, and Tom Lane.
Discussion: http://postgr.es/m/BYAPR06MB6373BF50B469CA393C614257ABF00@BYAPR06MB6373.namprd06.prod.outlook.com
Diffstat (limited to 'src/include')
| -rw-r--r-- | src/include/storage/proc.h | 37 | ||||
| -rw-r--r-- | src/include/storage/procarray.h | 5 |
2 files changed, 39 insertions, 3 deletions
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index b3ea1a25860..5798b91186f 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -84,6 +84,41 @@ struct XidCache #define INVALID_PGPROCNO PG_INT32_MAX /* + * Flags for PGPROC.delayChkpt + * + * These flags can be used to delay the start or completion of a checkpoint + * for short periods. A flag is in effect if the corresponding bit is set in + * the PGPROC of any backend. + * + * For our purposes here, a checkpoint has three phases: (1) determine the + * location to which the redo pointer will be moved, (2) write all the + * data durably to disk, and (3) WAL-log the checkpoint. + * + * Setting DELAY_CHKPT_START prevents the system from moving from phase 1 + * to phase 2. This is useful when we are performing a WAL-logged modification + * of data that will be flushed to disk in phase 2. By setting this flag + * before writing WAL and clearing it after we've both written WAL and + * performed the corresponding modification, we ensure that if the WAL record + * is inserted prior to the new redo point, the corresponding data changes will + * also be flushed to disk before the checkpoint can complete. (In the + * extremely common case where the data being modified is in shared buffers + * and we acquire an exclusive content lock on the relevant buffers before + * writing WAL, this mechanism is not needed, because phase 2 will block + * until we release the content lock and then flush the modified data to + * disk.) + * + * Setting DELAY_CHKPT_COMPLETE prevents the system from moving from phase 2 + * to phase 3. This is useful if we are performing a WAL-logged operation that + * might invalidate buffers, such as relation truncation. In this case, we need + * to ensure that any buffers which were invalidated and thus not flushed by + * the checkpoint are actaully destroyed on disk. Replay can cope with a file + * or block that doesn't exist, but not with a block that has the wrong + * contents. + */ +#define DELAY_CHKPT_START (1<<0) +#define DELAY_CHKPT_COMPLETE (1<<1) + +/* * Each backend has a PGPROC struct in shared memory. There is also a list of * currently-unused PGPROC structs that will be reallocated to new backends. * @@ -149,7 +184,7 @@ struct PGPROC LOCKMASK heldLocks; /* bitmask for lock types already held on this * lock object by this backend */ - bool delayChkpt; /* true if this proc delays checkpoint start */ + int delayChkpt; /* for DELAY_CHKPT_* flags */ /* * Info to allow us to wait for synchronous replication, if needed. diff --git a/src/include/storage/procarray.h b/src/include/storage/procarray.h index 200ef8db279..4dee2dab10c 100644 --- a/src/include/storage/procarray.h +++ b/src/include/storage/procarray.h @@ -92,8 +92,9 @@ extern TransactionId GetOldestXmin(Relation rel, int flags); extern TransactionId GetOldestActiveTransactionId(void); extern TransactionId GetOldestSafeDecodingTransactionId(bool catalogOnly); -extern VirtualTransactionId *GetVirtualXIDsDelayingChkpt(int *nvxids); -extern bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, int nvxids); +extern VirtualTransactionId *GetVirtualXIDsDelayingChkpt(int *nvxids, int type); +extern bool HaveVirtualXIDsDelayingChkpt(VirtualTransactionId *vxids, + int nvxids, int type); extern PGPROC *BackendPidGetProc(int pid); extern PGPROC *BackendPidGetProcWithLock(int pid); |
