diff options
author | Thomas Munro <tmunro@postgresql.org> | 2021-01-11 14:37:13 +1300 |
---|---|---|
committer | Thomas Munro <tmunro@postgresql.org> | 2021-01-11 15:24:38 +1300 |
commit | 13a021f3e8c99915b3cc0cb2021a948d9c71ff32 (patch) | |
tree | f532b74cfecef4bb150d2853509f105c3fe05e21 /src/port/pwrite.c | |
parent | 01334c92fa09dc496a444a4f206854ef37247258 (diff) |
Provide pg_preadv() and pg_pwritev().
Provide synchronous vectored file I/O routines. These map to preadv()
and pwritev(), with fallback implementations for systems that don't have
them. Also provide a wrapper pg_pwritev_with_retry() that automatically
retries on short writes.
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Reviewed-by: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/CA%2BhUKGJA%2Bu-220VONeoREBXJ9P3S94Y7J%2BkqCnTYmahvZJwM%3Dg%40mail.gmail.com
Diffstat (limited to 'src/port/pwrite.c')
-rw-r--r-- | src/port/pwrite.c | 107 |
1 files changed, 105 insertions, 2 deletions
diff --git a/src/port/pwrite.c b/src/port/pwrite.c index 282b27115e5..e029f44bc0c 100644 --- a/src/port/pwrite.c +++ b/src/port/pwrite.c @@ -1,7 +1,7 @@ /*------------------------------------------------------------------------- * * pwrite.c - * Implementation of pwrite(2) for platforms that lack one. + * Implementation of pwrite[v](2) for platforms that lack one. * * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * @@ -9,7 +9,8 @@ * src/port/pwrite.c * * Note that this implementation changes the current file position, unlike - * the POSIX function, so we use the name pg_pwrite(). + * the POSIX function, so we use the name pg_pwrite(). Likewise for the + * iovec version. * *------------------------------------------------------------------------- */ @@ -23,6 +24,9 @@ #include <unistd.h> #endif +#include "port/pg_iovec.h" + +#ifndef HAVE_PWRITE ssize_t pg_pwrite(int fd, const void *buf, size_t size, off_t offset) { @@ -53,3 +57,102 @@ pg_pwrite(int fd, const void *buf, size_t size, off_t offset) return write(fd, buf, size); #endif } +#endif + +#ifndef HAVE_PWRITEV +ssize_t +pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset) +{ +#ifdef HAVE_WRITEV + if (iovcnt == 1) + return pg_pwrite(fd, iov[0].iov_base, iov[0].iov_len, offset); + if (lseek(fd, offset, SEEK_SET) < 0) + return -1; + return writev(fd, iov, iovcnt); +#else + ssize_t sum = 0; + ssize_t part; + + for (int i = 0; i < iovcnt; ++i) + { + part = pg_pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset); + if (part < 0) + { + if (i == 0) + return -1; + else + return sum; + } + sum += part; + offset += part; + if (part < iov[i].iov_len) + return sum; + } + return sum; +#endif +} +#endif + +/* + * A convenience wrapper for pg_pwritev() that retries on partial write. If an + * error is returned, it is unspecified how much has been written. + */ +ssize_t +pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset) +{ + struct iovec iov_copy[PG_IOV_MAX]; + ssize_t sum = 0; + ssize_t part; + + /* We'd better have space to make a copy, in case we need to retry. */ + if (iovcnt > PG_IOV_MAX) + { + errno = EINVAL; + return -1; + } + + for (;;) + { + /* Write as much as we can. */ + part = pg_pwritev(fd, iov, iovcnt, offset); + if (part < 0) + return -1; + +#ifdef SIMULATE_SHORT_WRITE + part = Min(part, 4096); +#endif + + /* Count our progress. */ + sum += part; + offset += part; + + /* Step over iovecs that are done. */ + while (iovcnt > 0 && iov->iov_len <= part) + { + part -= iov->iov_len; + ++iov; + --iovcnt; + } + + /* Are they all done? */ + if (iovcnt == 0) + { + if (part > 0) + elog(ERROR, "unexpectedly wrote more than requested"); + break; + } + + /* + * Move whatever's left to the front of our mutable copy and adjust the + * leading iovec. + */ + Assert(iovcnt > 0); + memmove(iov_copy, iov, sizeof(*iov) * iovcnt); + Assert(iov->iov_len > part); + iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part; + iov_copy[0].iov_len -= part; + iov = iov_copy; + } + + return sum; +} |