summaryrefslogtreecommitdiff
path: root/src/backend/storage/smgr/md.c
diff options
context:
space:
mode:
authorMichael Paquier <michael@paquier.xyz>2025-11-13 12:41:40 +0900
committerMichael Paquier <michael@paquier.xyz>2025-11-13 12:41:40 +0900
commit84fb27511dbeaa0fb0d48249083d71f6cbbd4b98 (patch)
tree48b648a79136eff087d322cc8f7099c24a832455 /src/backend/storage/smgr/md.c
parent705601c5aeab56aef62dd69ac2b7acf662f08e9c (diff)
Replace off_t by pgoff_t in I/O routines
PostgreSQL's Windows port has never been able to handle files larger than 2GB due to the use of off_t for file offsets, only 32-bit on Windows. This causes signed integer overflow at exactly 2^31 bytes when trying to handle files larger than 2GB, for the routines touched by this commit. Note that large files are forbidden by ./configure (3c6248a828af) and meson (recent change, see 79cd66f28c65). This restriction also exists in v16 and older versions for the now-dead MSVC scripts. The code base already defines pgoff_t as __int64 (64-bit) on Windows for this purpose, and some function declarations in headers use it, but many internals still rely on off_t. This commit switches more routines to use pgoff_t, offering more portability, for areas mainly related to file extensions and storage. These are not critical for WAL segments yet, which have currently a maximum size allowed of 1GB (well, this opens the door at allowing a larger size for them). This matters more for segment files if we want to lift the large file restriction in ./configure and meson in the future, which would make sense to remove once/if all traces of off_t are gone from the tree. This can additionally matter for out-of-core code that may want files larger than 2GB in places where off_t is four bytes in size. Note that off_t is still used in other parts of the tree like buffile.c, WAL sender/receiver, base backup, pg_combinebackup, etc. These other code paths can be addressed separately, and their update will be required if we want to remove the large file restriction in the future. This commit is a good first cut in itself towards more portability, hopefully. On Unix-like systems, pgoff_t is defined as off_t, so this change only affects Windows behavior. Author: Bryan Green <dbryan.green@gmail.com> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Michael Paquier <michael@paquier.xyz> Discussion: https://postgr.es/m/0f238ff4-c442-42f5-adb8-01b762c94ca1@gmail.com
Diffstat (limited to 'src/backend/storage/smgr/md.c')
-rw-r--r--src/backend/storage/smgr/md.c50
1 files changed, 25 insertions, 25 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 235ba7e1914..e3f335a8340 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -487,7 +487,7 @@ void
mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
const void *buffer, bool skipFsync)
{
- off_t seekpos;
+ pgoff_t seekpos;
int nbytes;
MdfdVec *v;
@@ -515,9 +515,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
- seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+ seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
- Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+ Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
{
@@ -578,7 +578,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
while (remblocks > 0)
{
BlockNumber segstartblock = curblocknum % ((BlockNumber) RELSEG_SIZE);
- off_t seekpos = (off_t) BLCKSZ * segstartblock;
+ pgoff_t seekpos = (pgoff_t) BLCKSZ * segstartblock;
int numblocks;
if (segstartblock + remblocks > RELSEG_SIZE)
@@ -607,7 +607,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
int ret;
ret = FileFallocate(v->mdfd_vfd,
- seekpos, (off_t) BLCKSZ * numblocks,
+ seekpos, (pgoff_t) BLCKSZ * numblocks,
WAIT_EVENT_DATA_FILE_EXTEND);
if (ret != 0)
{
@@ -630,7 +630,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum,
* whole length of the extension.
*/
ret = FileZero(v->mdfd_vfd,
- seekpos, (off_t) BLCKSZ * numblocks,
+ seekpos, (pgoff_t) BLCKSZ * numblocks,
WAIT_EVENT_DATA_FILE_EXTEND);
if (ret < 0)
ereport(ERROR,
@@ -745,7 +745,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
while (nblocks > 0)
{
- off_t seekpos;
+ pgoff_t seekpos;
MdfdVec *v;
int nblocks_this_segment;
@@ -754,9 +754,9 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
if (v == NULL)
return false;
- seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+ seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
- Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+ Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
nblocks_this_segment =
Min(nblocks,
@@ -851,7 +851,7 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
{
struct iovec iov[PG_IOV_MAX];
int iovcnt;
- off_t seekpos;
+ pgoff_t seekpos;
int nbytes;
MdfdVec *v;
BlockNumber nblocks_this_segment;
@@ -861,9 +861,9 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
v = _mdfd_getseg(reln, forknum, blocknum, false,
EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
- seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+ seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
- Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+ Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
nblocks_this_segment =
Min(nblocks,
@@ -986,7 +986,7 @@ mdstartreadv(PgAioHandle *ioh,
SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
void **buffers, BlockNumber nblocks)
{
- off_t seekpos;
+ pgoff_t seekpos;
MdfdVec *v;
BlockNumber nblocks_this_segment;
struct iovec *iov;
@@ -996,9 +996,9 @@ mdstartreadv(PgAioHandle *ioh,
v = _mdfd_getseg(reln, forknum, blocknum, false,
EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
- seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+ seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
- Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+ Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
nblocks_this_segment =
Min(nblocks,
@@ -1068,7 +1068,7 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
{
struct iovec iov[PG_IOV_MAX];
int iovcnt;
- off_t seekpos;
+ pgoff_t seekpos;
int nbytes;
MdfdVec *v;
BlockNumber nblocks_this_segment;
@@ -1078,9 +1078,9 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
v = _mdfd_getseg(reln, forknum, blocknum, skipFsync,
EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY);
- seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+ seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
- Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+ Assert(seekpos < (pgoff_t) BLCKSZ * RELSEG_SIZE);
nblocks_this_segment =
Min(nblocks,
@@ -1173,7 +1173,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
while (nblocks > 0)
{
BlockNumber nflush = nblocks;
- off_t seekpos;
+ pgoff_t seekpos;
MdfdVec *v;
int segnum_start,
segnum_end;
@@ -1202,9 +1202,9 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum,
Assert(nflush >= 1);
Assert(nflush <= nblocks);
- seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+ seekpos = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
- FileWriteback(v->mdfd_vfd, seekpos, (off_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
+ FileWriteback(v->mdfd_vfd, seekpos, (pgoff_t) BLCKSZ * nflush, WAIT_EVENT_DATA_FILE_FLUSH);
nblocks -= nflush;
blocknum += nflush;
@@ -1348,7 +1348,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum,
*/
BlockNumber lastsegblocks = nblocks - priorblocks;
- if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
+ if (FileTruncate(v->mdfd_vfd, (pgoff_t) lastsegblocks * BLCKSZ, WAIT_EVENT_DATA_FILE_TRUNCATE) < 0)
ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not truncate file \"%s\" to %u blocks: %m",
@@ -1484,9 +1484,9 @@ mdfd(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, uint32 *off)
v = _mdfd_getseg(reln, forknum, blocknum, false,
EXTENSION_FAIL);
- *off = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+ *off = (pgoff_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
- Assert(*off < (off_t) BLCKSZ * RELSEG_SIZE);
+ Assert(*off < (pgoff_t) BLCKSZ * RELSEG_SIZE);
return FileGetRawDesc(v->mdfd_vfd);
}
@@ -1868,7 +1868,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
static BlockNumber
_mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
{
- off_t len;
+ pgoff_t len;
len = FileSize(seg->mdfd_vfd);
if (len < 0)