diff options
| author | Michael Paquier <michael@paquier.xyz> | 2025-11-13 12:41:40 +0900 |
|---|---|---|
| committer | Michael Paquier <michael@paquier.xyz> | 2025-11-13 12:41:40 +0900 |
| commit | 84fb27511dbeaa0fb0d48249083d71f6cbbd4b98 (patch) | |
| tree | 48b648a79136eff087d322cc8f7099c24a832455 /src/backend/storage/smgr/md.c | |
| parent | 705601c5aeab56aef62dd69ac2b7acf662f08e9c (diff) | |
Replace off_t by pgoff_t in I/O routines
PostgreSQL's Windows port has never been able to handle files larger
than 2GB due to the use of off_t for file offsets, only 32-bit on
Windows. This causes signed integer overflow at exactly 2^31 bytes when
trying to handle files larger than 2GB, for the routines touched by this
commit.
Note that large files are forbidden by ./configure (3c6248a828af) and
meson (recent change, see 79cd66f28c65). This restriction also exists
in v16 and older versions for the now-dead MSVC scripts.
The code base already defines pgoff_t as __int64 (64-bit) on Windows for
this purpose, and some function declarations in headers use it, but many
internals still rely on off_t. This commit switches more routines to
use pgoff_t, offering more portability, for areas mainly related to file
extensions and storage.
These are not critical for WAL segments yet, which have currently a
maximum size allowed of 1GB (well, this opens the door at allowing a
larger size for them). This matters more for segment files if we want
to lift the large file restriction in ./configure and meson in the
future, which would make sense to remove once/if all traces of off_t are
gone from the tree. This can additionally matter for out-of-core code
that may want files larger than 2GB in places where off_t is four bytes
in size.
Note that off_t is still used in other parts of the tree like
buffile.c, WAL sender/receiver, base backup, pg_combinebackup, etc.
These other code paths can be addressed separately, and their update
will be required if we want to remove the large file restriction in the
future. This commit is a good first cut in itself towards more
portability, hopefully.
On Unix-like systems, pgoff_t is defined as off_t, so this change only
affects Windows behavior.
Author: Bryan Green <dbryan.green@gmail.com>
Reviewed-by: Thomas Munro <thomas.munro@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Discussion: https://postgr.es/m/0f238ff4-c442-42f5-adb8-01b762c94ca1@gmail.com
Diffstat (limited to 'src/backend/storage/smgr/md.c')
| -rw-r--r-- | src/backend/storage/smgr/md.c | 50 |
1 files changed, 25 insertions, 25 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 235ba7e1914..e3f335a8340 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -487,7 +487,7 @@ void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync) { - off_t seekpos; + pgoff_t seekpos; int nbytes; MdfdVec *v; @@ -515,9 +515,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE); - seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); - Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); + Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE); if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ) { @@ -578,7 +578,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, while (remblocks > 0) { BlockNumber segstartblock = curblocknum % ((BlockNumber) RELSEG_SIZE); - off_t seekpos = (off_t) BLCKSZ * segstartblock; + pgoff_t seekpos = (pgoff_t) BLCKSZ * segstartblock; int numblocks; if (segstartblock + remblocks > RELSEG_SIZE) @@ -607,7 +607,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, int ret; ret = FileFallocate(v->mdfd_vfd, - seekpos, (off_t) BLCKSZ * numblocks, + seekpos, (pgoff_t) BLCKSZ * numblocks, WAIT_EVENT_DATA_FILE_EXTEND); if (ret != 0) { @@ -630,7 +630,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, * whole length of the extension. */ ret = FileZero(v->mdfd_vfd, - seekpos, (off_t) BLCKSZ * numblocks, + seekpos, (pgoff_t) BLCKSZ * numblocks, WAIT_EVENT_DATA_FILE_EXTEND); if (ret < 0) ereport(ERROR, @@ -745,7 +745,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, while (nblocks > 0) { - off_t seekpos; + pgoff_t seekpos; MdfdVec *v; int nblocks_this_segment; @@ -754,9 +754,9 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, if (v == NULL) return false; - seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); - Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); + Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE); nblocks_this_segment = Min(nblocks, @@ -851,7 +851,7 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, { struct iovec iov[PG_IOV_MAX]; int iovcnt; - off_t seekpos; + pgoff_t seekpos; int nbytes; MdfdVec *v; BlockNumber nblocks_this_segment; @@ -861,9 +861,9 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); - seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); - Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); + Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE); nblocks_this_segment = Min(nblocks, @@ -986,7 +986,7 @@ mdstartreadv(PgAioHandle *ioh, SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks) { - off_t seekpos; + pgoff_t seekpos; MdfdVec *v; BlockNumber nblocks_this_segment; struct iovec *iov; @@ -996,9 +996,9 @@ mdstartreadv(PgAioHandle *ioh, v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); - seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); - Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); + Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE); nblocks_this_segment = Min(nblocks, @@ -1068,7 +1068,7 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, { struct iovec iov[PG_IOV_MAX]; int iovcnt; - off_t seekpos; + pgoff_t seekpos; int nbytes; MdfdVec *v; BlockNumber nblocks_this_segment; @@ -1078,9 +1078,9 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); - seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); - Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); + Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE); nblocks_this_segment = Min(nblocks, @@ -1173,7 +1173,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, while (nblocks > 0) { BlockNumber nflush = nblocks; - off_t seekpos; + pgoff_t seekpos; MdfdVec *v; int segnum_start, segnum_end; @@ -1202,9 +1202,9 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, Assert(nflush >= 1); Assert(nflush <= nblocks); - seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); - FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH); + FileWriteback(v->mdfd_vfd, seekpos, (pgoff_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH); nblocks -= nflush; blocknum += nflush; @@ -1348,7 +1348,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, */ BlockNumber lastsegblocks = nblocks - priorblocks; - if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0) + if (FileTruncate(v->mdfd_vfd, (pgoff_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not truncate file \"%s\" to %u blocks: %m", @@ -1484,9 +1484,9 @@ mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off) v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL); - *off = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); + *off = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); - Assert(*off < (off_t) BLCKSZ * RELSEG_SIZE); + Assert(*off < (pgoff_t) BLCKSZ * RELSEG_SIZE); return FileGetRawDesc(v->mdfd_vfd); } @@ -1868,7 +1868,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) { - off_t len; + pgoff_t len; len = FileSize(seg->mdfd_vfd); if (len < 0) |
