diff options
Diffstat (limited to 'src/backend/storage/file/fd.c')
-rw-r--r-- | src/backend/storage/file/fd.c | 1336 |
1 files changed, 0 insertions, 1336 deletions
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c deleted file mode 100644 index 391a078e602..00000000000 --- a/src/backend/storage/file/fd.c +++ /dev/null @@ -1,1336 +0,0 @@ -/*------------------------------------------------------------------------- - * - * fd.c - * Virtual file descriptor code. - * - * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/storage/file/fd.c,v 1.92 2002/06/20 20:29:34 momjian Exp $ - * - * NOTES: - * - * This code manages a cache of 'virtual' file descriptors (VFDs). - * The server opens many file descriptors for a variety of reasons, - * including base tables, scratch files (e.g., sort and hash spool - * files), and random calls to C library routines like system(3); it - * is quite easy to exceed system limits on the number of open files a - * single process can have. (This is around 256 on many modern - * operating systems, but can be as low as 32 on others.) - * - * VFDs are managed as an LRU pool, with actual OS file descriptors - * being opened and closed as needed. Obviously, if a routine is - * opened using these interfaces, all subsequent operations must also - * be through these interfaces (the File type is not a real file - * descriptor). - * - * For this scheme to work, most (if not all) routines throughout the - * server should use these interfaces instead of calling the C library - * routines (e.g., open(2) and fopen(3)) themselves. Otherwise, we - * may find ourselves short of real file descriptors anyway. - * - * This file used to contain a bunch of stuff to support RAID levels 0 - * (jbod), 1 (duplex) and 5 (xor parity). That stuff is all gone - * because the parallel query processing code that called it is all - * gone. If you really need it you could get it from the original - * POSTGRES source. - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include <sys/types.h> -#include <sys/file.h> -#include <sys/param.h> -#include <sys/stat.h> -#include <dirent.h> -#include <errno.h> -#include <unistd.h> -#include <fcntl.h> - -#include "miscadmin.h" -#include "storage/fd.h" -#include "storage/ipc.h" - - -/* Filename components for OpenTemporaryFile */ -#define PG_TEMP_FILES_DIR "pgsql_tmp" -#define PG_TEMP_FILE_PREFIX "pgsql_tmp" - - -/* - * Problem: Postgres does a system(ld...) to do dynamic loading. - * This will open several extra files in addition to those used by - * Postgres. We need to guarantee that there are file descriptors free - * for ld to use. - * - * The current solution is to limit the number of file descriptors - * that this code will allocate at one time: it leaves RESERVE_FOR_LD free. - * - * (Even though most dynamic loaders now use dlopen(3) or the - * equivalent, the OS must still open several files to perform the - * dynamic loading. And stdin/stdout/stderr count too. Keep this here.) - */ -#ifndef RESERVE_FOR_LD -#define RESERVE_FOR_LD 10 -#endif - -/* - * We need to ensure that we have at least some file descriptors - * available to postgreSQL after we've reserved the ones for LD, - * so we set that value here. - * - * I think 10 is an appropriate value so that's what it'll be - * for now. - */ -#ifndef FD_MINFREE -#define FD_MINFREE 10 -#endif - -/* - * A number of platforms return values for sysconf(_SC_OPEN_MAX) that are - * far beyond what they can really support. This GUC parameter limits what - * we will believe. - */ -int max_files_per_process = 1000; - - -/* Debugging.... */ - -#ifdef FDDEBUG -#define DO_DB(A) A -#else -#define DO_DB(A) /* A */ -#endif - -#define VFD_CLOSED (-1) - -#define FileIsValid(file) \ - ((file) > 0 && (file) < (int) SizeVfdCache && VfdCache[file].fileName != NULL) - -#define FileIsNotOpen(file) (VfdCache[file].fd == VFD_CLOSED) - -#define FileUnknownPos (-1L) - -typedef struct vfd -{ - signed short fd; /* current FD, or VFD_CLOSED if none */ - unsigned short fdstate; /* bitflags for VFD's state */ - -/* these are the assigned bits in fdstate: */ -#define FD_DIRTY (1 << 0) /* written to, but not yet fsync'd */ -#define FD_TEMPORARY (1 << 1) /* should be unlinked when closed */ - - File nextFree; /* link to next free VFD, if in freelist */ - File lruMoreRecently; /* doubly linked recency-of-use list */ - File lruLessRecently; - long seekPos; /* current logical file position */ - char *fileName; /* name of file, or NULL for unused VFD */ - /* NB: fileName is malloc'd, and must be free'd when closing the VFD */ - int fileFlags; /* open(2) flags for (re)opening the file */ - int fileMode; /* mode to pass to open(2) */ -} Vfd; - -/* - * Virtual File Descriptor array pointer and size. This grows as - * needed. 'File' values are indexes into this array. - * Note that VfdCache[0] is not a usable VFD, just a list header. - */ -static Vfd *VfdCache; -static Size SizeVfdCache = 0; - -/* - * Number of file descriptors known to be in use by VFD entries. - */ -static int nfile = 0; - -/* - * List of stdio FILEs opened with AllocateFile. - * - * Since we don't want to encourage heavy use of AllocateFile, it seems - * OK to put a pretty small maximum limit on the number of simultaneously - * allocated files. - */ -#define MAX_ALLOCATED_FILES 32 - -static int numAllocatedFiles = 0; -static FILE *allocatedFiles[MAX_ALLOCATED_FILES]; - -/* - * Number of temporary files opened during the current transaction; - * this is used in generation of tempfile names. - */ -static long tempFileCounter = 0; - - -/*-------------------- - * - * Private Routines - * - * Delete - delete a file from the Lru ring - * LruDelete - remove a file from the Lru ring and close its FD - * Insert - put a file at the front of the Lru ring - * LruInsert - put a file at the front of the Lru ring and open it - * ReleaseLruFile - Release an fd by closing the last entry in the Lru ring - * AllocateVfd - grab a free (or new) file record (from VfdArray) - * FreeVfd - free a file record - * - * The Least Recently Used ring is a doubly linked list that begins and - * ends on element zero. Element zero is special -- it doesn't represent - * a file and its "fd" field always == VFD_CLOSED. Element zero is just an - * anchor that shows us the beginning/end of the ring. - * Only VFD elements that are currently really open (have an FD assigned) are - * in the Lru ring. Elements that are "virtually" open can be recognized - * by having a non-null fileName field. - * - * example: - * - * /--less----\ /---------\ - * v \ v \ - * #0 --more---> LeastRecentlyUsed --more-\ \ - * ^\ | | - * \\less--> MostRecentlyUsedFile <---/ | - * \more---/ \--less--/ - * - *-------------------- - */ -static void Delete(File file); -static void LruDelete(File file); -static void Insert(File file); -static int LruInsert(File file); -static bool ReleaseLruFile(void); -static File AllocateVfd(void); -static void FreeVfd(File file); - -static int FileAccess(File file); -static File fileNameOpenFile(FileName fileName, int fileFlags, int fileMode); -static char *filepath(const char *filename); -static long pg_nofile(void); - -/* - * pg_fsync --- same as fsync except does nothing if enableFsync is off - */ -int -pg_fsync(int fd) -{ - if (enableFsync) - return fsync(fd); - else - return 0; -} - -/* - * pg_fdatasync --- same as fdatasync except does nothing if enableFsync is off - * - * Not all platforms have fdatasync; treat as fsync if not available. - */ -int -pg_fdatasync(int fd) -{ - if (enableFsync) - { -#ifdef HAVE_FDATASYNC - return fdatasync(fd); -#else - return fsync(fd); -#endif - } - else - return 0; -} - -/* - * BasicOpenFile --- same as open(2) except can free other FDs if needed - * - * This is exported for use by places that really want a plain kernel FD, - * but need to be proof against running out of FDs. Once an FD has been - * successfully returned, it is the caller's responsibility to ensure that - * it will not be leaked on elog()! Most users should *not* call this - * routine directly, but instead use the VFD abstraction level, which - * provides protection against descriptor leaks as well as management of - * files that need to be open for more than a short period of time. - * - * Ideally this should be the *only* direct call of open() in the backend. - * In practice, the postmaster calls open() directly, and there are some - * direct open() calls done early in backend startup. Those are OK since - * this module wouldn't have any open files to close at that point anyway. - */ -int -BasicOpenFile(FileName fileName, int fileFlags, int fileMode) -{ - int fd; - -tryAgain: - fd = open(fileName, fileFlags, fileMode); - - if (fd >= 0) - return fd; /* success! */ - - if (errno == EMFILE || errno == ENFILE) - { - int save_errno = errno; - - DO_DB(elog(LOG, "BasicOpenFile: not enough descs, retry, er= %d", - errno)); - errno = 0; - if (ReleaseLruFile()) - goto tryAgain; - errno = save_errno; - } - - return -1; /* failure */ -} - -/* - * pg_nofile: determine number of filedescriptors that fd.c is allowed to use - */ -static long -pg_nofile(void) -{ - static long no_files = 0; - - /* need do this calculation only once */ - if (no_files == 0) - { - /* - * Ask the system what its files-per-process limit is. - */ -#ifdef HAVE_SYSCONF - no_files = sysconf(_SC_OPEN_MAX); - if (no_files <= 0) - { -#ifdef NOFILE - no_files = (long) NOFILE; -#else - no_files = (long) max_files_per_process; -#endif - elog(LOG, "pg_nofile: sysconf(_SC_OPEN_MAX) failed; using %ld", - no_files); - } -#else /* !HAVE_SYSCONF */ -#ifdef NOFILE - no_files = (long) NOFILE; -#else - no_files = (long) max_files_per_process; -#endif -#endif /* HAVE_SYSCONF */ - - /* - * Some platforms return hopelessly optimistic values. Apply a - * configurable upper limit. - */ - if (no_files > (long) max_files_per_process) - no_files = (long) max_files_per_process; - - /* - * Make sure we have enough to get by after reserving some for LD. - */ - if ((no_files - RESERVE_FOR_LD) < FD_MINFREE) - elog(FATAL, "pg_nofile: insufficient file descriptors available to start backend.\n" - "\tSystem allows %ld, we need at least %d.", - no_files, RESERVE_FOR_LD + FD_MINFREE); - - no_files -= RESERVE_FOR_LD; - } - - return no_files; -} - -#if defined(FDDEBUG) - -static void -_dump_lru(void) -{ - int mru = VfdCache[0].lruLessRecently; - Vfd *vfdP = &VfdCache[mru]; - char buf[2048]; - - sprintf(buf, "LRU: MOST %d ", mru); - while (mru != 0) - { - mru = vfdP->lruLessRecently; - vfdP = &VfdCache[mru]; - sprintf(buf + strlen(buf), "%d ", mru); - } - sprintf(buf + strlen(buf), "LEAST"); - elog(LOG, buf); -} -#endif /* FDDEBUG */ - -static void -Delete(File file) -{ - Vfd *vfdP; - - Assert(file != 0); - - DO_DB(elog(LOG, "Delete %d (%s)", - file, VfdCache[file].fileName)); - DO_DB(_dump_lru()); - - vfdP = &VfdCache[file]; - - VfdCache[vfdP->lruLessRecently].lruMoreRecently = vfdP->lruMoreRecently; - VfdCache[vfdP->lruMoreRecently].lruLessRecently = vfdP->lruLessRecently; - - DO_DB(_dump_lru()); -} - -static void -LruDelete(File file) -{ - Vfd *vfdP; - - Assert(file != 0); - - DO_DB(elog(LOG, "LruDelete %d (%s)", - file, VfdCache[file].fileName)); - - vfdP = &VfdCache[file]; - - /* delete the vfd record from the LRU ring */ - Delete(file); - - /* save the seek position */ - vfdP->seekPos = (long) lseek(vfdP->fd, 0L, SEEK_CUR); - Assert(vfdP->seekPos != -1L); - - /* if we have written to the file, sync it before closing */ - if (vfdP->fdstate & FD_DIRTY) - { - if (pg_fsync(vfdP->fd)) - elog(LOG, "LruDelete: failed to fsync %s: %m", - vfdP->fileName); - vfdP->fdstate &= ~FD_DIRTY; - } - - /* close the file */ - if (close(vfdP->fd)) - elog(LOG, "LruDelete: failed to close %s: %m", - vfdP->fileName); - - --nfile; - vfdP->fd = VFD_CLOSED; -} - -static void -Insert(File file) -{ - Vfd *vfdP; - - Assert(file != 0); - - DO_DB(elog(LOG, "Insert %d (%s)", - file, VfdCache[file].fileName)); - DO_DB(_dump_lru()); - - vfdP = &VfdCache[file]; - - vfdP->lruMoreRecently = 0; - vfdP->lruLessRecently = VfdCache[0].lruLessRecently; - VfdCache[0].lruLessRecently = file; - VfdCache[vfdP->lruLessRecently].lruMoreRecently = file; - - DO_DB(_dump_lru()); -} - -static int -LruInsert(File file) -{ - Vfd *vfdP; - - Assert(file != 0); - - DO_DB(elog(LOG, "LruInsert %d (%s)", - file, VfdCache[file].fileName)); - - vfdP = &VfdCache[file]; - - if (FileIsNotOpen(file)) - { - while (nfile + numAllocatedFiles >= pg_nofile()) - { - if (!ReleaseLruFile()) - break; - } - - /* - * The open could still fail for lack of file descriptors, eg due - * to overall system file table being full. So, be prepared to - * release another FD if necessary... - */ - vfdP->fd = BasicOpenFile(vfdP->fileName, vfdP->fileFlags, - vfdP->fileMode); - if (vfdP->fd < 0) - { - DO_DB(elog(LOG, "RE_OPEN FAILED: %d", errno)); - return vfdP->fd; - } - else - { - DO_DB(elog(LOG, "RE_OPEN SUCCESS")); - ++nfile; - } - - /* seek to the right position */ - if (vfdP->seekPos != 0L) - { - long returnValue; - - returnValue = (long) lseek(vfdP->fd, vfdP->seekPos, SEEK_SET); - Assert(returnValue != -1L); - } - } - - /* - * put it at the head of the Lru ring - */ - - Insert(file); - - return 0; -} - -static bool -ReleaseLruFile(void) -{ - DO_DB(elog(LOG, "ReleaseLruFile. Opened %d", nfile)); - - if (nfile > 0) - { - /* - * There are opened files and so there should be at least one used - * vfd in the ring. - */ - Assert(VfdCache[0].lruMoreRecently != 0); - LruDelete(VfdCache[0].lruMoreRecently); - return true; /* freed a file */ - } - return false; /* no files available to free */ -} - -static File -AllocateVfd(void) -{ - Index i; - File file; - - DO_DB(elog(LOG, "AllocateVfd. Size %d", SizeVfdCache)); - - if (SizeVfdCache == 0) - { - /* initialize header entry first time through */ - VfdCache = (Vfd *) malloc(sizeof(Vfd)); - if (VfdCache == NULL) - elog(FATAL, "AllocateVfd: no room for VFD array"); - MemSet((char *) &(VfdCache[0]), 0, sizeof(Vfd)); - VfdCache->fd = VFD_CLOSED; - - SizeVfdCache = 1; - - /* - * register proc-exit call to ensure temp files are dropped at - * exit - */ - on_proc_exit(AtEOXact_Files, 0); - } - - if (VfdCache[0].nextFree == 0) - { - /* - * The free list is empty so it is time to increase the size of - * the array. We choose to double it each time this happens. - * However, there's not much point in starting *real* small. - */ - Size newCacheSize = SizeVfdCache * 2; - Vfd *newVfdCache; - - if (newCacheSize < 32) - newCacheSize = 32; - - /* - * Be careful not to clobber VfdCache ptr if realloc fails; we - * will need it during proc_exit cleanup! - */ - newVfdCache = (Vfd *) realloc(VfdCache, sizeof(Vfd) * newCacheSize); - if (newVfdCache == NULL) - elog(FATAL, "AllocateVfd: no room to enlarge VFD array"); - VfdCache = newVfdCache; - - /* - * Initialize the new entries and link them into the free list. - */ - for (i = SizeVfdCache; i < newCacheSize; i++) - { - MemSet((char *) &(VfdCache[i]), 0, sizeof(Vfd)); - VfdCache[i].nextFree = i + 1; - VfdCache[i].fd = VFD_CLOSED; - } - VfdCache[newCacheSize - 1].nextFree = 0; - VfdCache[0].nextFree = SizeVfdCache; - - /* - * Record the new size - */ - SizeVfdCache = newCacheSize; - } - - file = VfdCache[0].nextFree; - - VfdCache[0].nextFree = VfdCache[file].nextFree; - - return file; -} - -static void -FreeVfd(File file) -{ - Vfd *vfdP = &VfdCache[file]; - - DO_DB(elog(LOG, "FreeVfd: %d (%s)", - file, vfdP->fileName ? vfdP->fileName : "")); - - if (vfdP->fileName != NULL) - { - free(vfdP->fileName); - vfdP->fileName = NULL; - } - vfdP->fdstate = 0x0; - - vfdP->nextFree = VfdCache[0].nextFree; - VfdCache[0].nextFree = file; -} - -/* filepath() - * Convert given pathname to absolute. - * - * Result is a palloc'd string. - * - * (Generally, this isn't actually necessary, considering that we - * should be cd'd into the database directory. Presently it is only - * necessary to do it in "bootstrap" mode. Maybe we should change - * bootstrap mode to do the cd, and save a few cycles/bytes here.) - */ -static char * -filepath(const char *filename) -{ - char *buf; - - /* Not an absolute path name? Then fill in with database path... */ - if (*filename != '/') - { - buf = (char *) palloc(strlen(DatabasePath) + strlen(filename) + 2); - sprintf(buf, "%s/%s", DatabasePath, filename); - } - else - buf = pstrdup(filename); - -#ifdef FILEDEBUG - printf("filepath: path is %s\n", buf); -#endif - - return buf; -} - -static int -FileAccess(File file) -{ - int returnValue; - - DO_DB(elog(LOG, "FileAccess %d (%s)", - file, VfdCache[file].fileName)); - - /* - * Is the file open? If not, open it and put it at the head of the - * LRU ring (possibly closing the least recently used file to get an - * FD). - */ - - if (FileIsNotOpen(file)) - { - returnValue = LruInsert(file); - if (returnValue != 0) - return returnValue; - } - else if (VfdCache[0].lruLessRecently != file) - { - /* - * We now know that the file is open and that it is not the last - * one accessed, so we need to move it to the head of the Lru - * ring. - */ - - Delete(file); - Insert(file); - } - - return 0; -} - -/* - * Called when we get a shared invalidation message on some relation. - */ -#ifdef NOT_USED -void -FileInvalidate(File file) -{ - Assert(FileIsValid(file)); - if (!FileIsNotOpen(file)) - LruDelete(file); -} -#endif - -static File -fileNameOpenFile(FileName fileName, - int fileFlags, - int fileMode) -{ - File file; - Vfd *vfdP; - - if (fileName == NULL) - elog(ERROR, "fileNameOpenFile: NULL fname"); - - DO_DB(elog(LOG, "fileNameOpenFile: %s %x %o", - fileName, fileFlags, fileMode)); - - file = AllocateVfd(); - vfdP = &VfdCache[file]; - - while (nfile + numAllocatedFiles >= pg_nofile()) - { - if (!ReleaseLruFile()) - break; - } - - vfdP->fd = BasicOpenFile(fileName, fileFlags, fileMode); - - if (vfdP->fd < 0) - { - FreeVfd(file); - return -1; - } - ++nfile; - DO_DB(elog(LOG, "fileNameOpenFile: success %d", - vfdP->fd)); - - Insert(file); - - vfdP->fileName = (char *) malloc(strlen(fileName) + 1); - if (vfdP->fileName == NULL) - elog(FATAL, "fileNameOpenFile: no room to save VFD filename"); - strcpy(vfdP->fileName, fileName); - - /* Saved flags are adjusted to be OK for re-opening file */ - vfdP->fileFlags = fileFlags & ~(O_CREAT | O_TRUNC | O_EXCL); - vfdP->fileMode = fileMode; - - vfdP->seekPos = 0; - - /* - * Have to fsync file on commit. Alternative way - log file creation - * and fsync log before actual file creation. - */ - if (fileFlags & O_CREAT) - vfdP->fdstate = FD_DIRTY; - else - vfdP->fdstate = 0x0; - - return file; -} - -/* - * open a file in the database directory ($PGDATA/base/...) - */ -File -FileNameOpenFile(FileName fileName, int fileFlags, int fileMode) -{ - File fd; - char *fname; - - fname = filepath(fileName); - fd = fileNameOpenFile(fname, fileFlags, fileMode); - pfree(fname); - return fd; -} - -/* - * open a file in an arbitrary directory - */ -File -PathNameOpenFile(FileName fileName, int fileFlags, int fileMode) -{ - return fileNameOpenFile(fileName, fileFlags, fileMode); -} - -/* - * Open a temporary file that will disappear when we close it. - * - * This routine takes care of generating an appropriate tempfile name. - * There's no need to pass in fileFlags or fileMode either, since only - * one setting makes any sense for a temp file. - */ -File -OpenTemporaryFile(void) -{ - char tempfilepath[128]; - File file; - - /* - * Generate a tempfile name that's unique within the current - * transaction and database instance. - */ - snprintf(tempfilepath, sizeof(tempfilepath), - "%s/%s%d.%ld", PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX, - MyProcPid, tempFileCounter++); - - /* - * Open the file. Note: we don't use O_EXCL, in case there is an - * orphaned temp file that can be reused. - */ - file = FileNameOpenFile(tempfilepath, - O_RDWR | O_CREAT | O_TRUNC | PG_BINARY, - 0600); - if (file <= 0) - { - char *dirpath; - - /* - * We might need to create the pg_tempfiles subdirectory, if no - * one has yet done so. - * - * Don't check for error from mkdir; it could fail if someone else - * just did the same thing. If it doesn't work then we'll bomb - * out on the second create attempt, instead. - */ - dirpath = filepath(PG_TEMP_FILES_DIR); - mkdir(dirpath, S_IRWXU); - pfree(dirpath); - - file = FileNameOpenFile(tempfilepath, - O_RDWR | O_CREAT | O_TRUNC | PG_BINARY, - 0600); - if (file <= 0) - elog(ERROR, "Failed to create temporary file %s", tempfilepath); - } - - /* Mark it for deletion at close or EOXact */ - VfdCache[file].fdstate |= FD_TEMPORARY; - - return file; -} - -/* - * close a file when done with it - */ -void -FileClose(File file) -{ - Vfd *vfdP; - - Assert(FileIsValid(file)); - - DO_DB(elog(LOG, "FileClose: %d (%s)", - file, VfdCache[file].fileName)); - - vfdP = &VfdCache[file]; - - if (!FileIsNotOpen(file)) - { - /* remove the file from the lru ring */ - Delete(file); - - /* if we did any writes, sync the file before closing */ - if (vfdP->fdstate & FD_DIRTY) - { - if (pg_fsync(vfdP->fd)) - elog(LOG, "FileClose: failed to fsync %s: %m", - vfdP->fileName); - vfdP->fdstate &= ~FD_DIRTY; - } - - /* close the file */ - if (close(vfdP->fd)) - elog(LOG, "FileClose: failed to close %s: %m", - vfdP->fileName); - - --nfile; - vfdP->fd = VFD_CLOSED; - } - - /* - * Delete the file if it was temporary - */ - if (vfdP->fdstate & FD_TEMPORARY) - { - /* reset flag so that die() interrupt won't cause problems */ - vfdP->fdstate &= ~FD_TEMPORARY; - if (unlink(vfdP->fileName)) - elog(LOG, "FileClose: failed to unlink %s: %m", - vfdP->fileName); - } - - /* - * Return the Vfd slot to the free list - */ - FreeVfd(file); -} - -/* - * close a file and forcibly delete the underlying Unix file - */ -void -FileUnlink(File file) -{ - Assert(FileIsValid(file)); - - DO_DB(elog(LOG, "FileUnlink: %d (%s)", - file, VfdCache[file].fileName)); - - /* force FileClose to delete it */ - VfdCache[file].fdstate |= FD_TEMPORARY; - - FileClose(file); -} - -int -FileRead(File file, char *buffer, int amount) -{ - int returnCode; - - Assert(FileIsValid(file)); - - DO_DB(elog(LOG, "FileRead: %d (%s) %ld %d %p", - file, VfdCache[file].fileName, - VfdCache[file].seekPos, amount, buffer)); - - FileAccess(file); - returnCode = read(VfdCache[file].fd, buffer, amount); - if (returnCode > 0) - VfdCache[file].seekPos += returnCode; - else - VfdCache[file].seekPos = FileUnknownPos; - - return returnCode; -} - -int -FileWrite(File file, char *buffer, int amount) -{ - int returnCode; - - Assert(FileIsValid(file)); - - DO_DB(elog(LOG, "FileWrite: %d (%s) %ld %d %p", - file, VfdCache[file].fileName, - VfdCache[file].seekPos, amount, buffer)); - - FileAccess(file); - - errno = 0; - returnCode = write(VfdCache[file].fd, buffer, amount); - - /* if write didn't set errno, assume problem is no disk space */ - if (returnCode != amount && errno == 0) - errno = ENOSPC; - - if (returnCode > 0) - VfdCache[file].seekPos += returnCode; - else - VfdCache[file].seekPos = FileUnknownPos; - - return returnCode; -} - -long -FileSeek(File file, long offset, int whence) -{ - Assert(FileIsValid(file)); - - DO_DB(elog(LOG, "FileSeek: %d (%s) %ld %ld %d", - file, VfdCache[file].fileName, - VfdCache[file].seekPos, offset, whence)); - - if (FileIsNotOpen(file)) - { - switch (whence) - { - case SEEK_SET: - if (offset < 0) - elog(ERROR, "FileSeek: invalid offset: %ld", offset); - VfdCache[file].seekPos = offset; - break; - case SEEK_CUR: - VfdCache[file].seekPos += offset; - break; - case SEEK_END: - FileAccess(file); - VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence); - break; - default: - elog(ERROR, "FileSeek: invalid whence: %d", whence); - break; - } - } - else - { - switch (whence) - { - case SEEK_SET: - if (offset < 0) - elog(ERROR, "FileSeek: invalid offset: %ld", offset); - if (VfdCache[file].seekPos != offset) - VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence); - break; - case SEEK_CUR: - if (offset != 0 || VfdCache[file].seekPos == FileUnknownPos) - VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence); - break; - case SEEK_END: - VfdCache[file].seekPos = lseek(VfdCache[file].fd, offset, whence); - break; - default: - elog(ERROR, "FileSeek: invalid whence: %d", whence); - break; - } - } - return VfdCache[file].seekPos; -} - -/* - * XXX not actually used but here for completeness - */ -#ifdef NOT_USED -long -FileTell(File file) -{ - Assert(FileIsValid(file)); - DO_DB(elog(LOG, "FileTell %d (%s)", - file, VfdCache[file].fileName)); - return VfdCache[file].seekPos; -} -#endif - -int -FileTruncate(File file, long offset) -{ - int returnCode; - - Assert(FileIsValid(file)); - - DO_DB(elog(LOG, "FileTruncate %d (%s)", - file, VfdCache[file].fileName)); - - FileSync(file); - FileAccess(file); - returnCode = ftruncate(VfdCache[file].fd, (size_t) offset); - return returnCode; -} - -/* - * FileSync --- if a file is marked as dirty, fsync it. - * - * The FD_DIRTY bit is slightly misnamed: it doesn't mean that we need to - * write the file, but that we *have* written it and need to execute an - * fsync() to ensure the changes are down on disk before we mark the current - * transaction committed. - * - * FD_DIRTY is set by FileWrite or by an explicit FileMarkDirty() call. - * It is cleared after successfully fsync'ing the file. FileClose() will - * fsync a dirty File that is about to be closed, since there will be no - * other place to remember the need to fsync after the VFD is gone. - * - * Note that the DIRTY bit is logically associated with the actual disk file, - * not with any particular kernel FD we might have open for it. We assume - * that fsync will force out any dirty buffers for that file, whether or not - * they were written through the FD being used for the fsync call --- they - * might even have been written by some other backend! - * - * Note also that LruDelete currently fsyncs a dirty file that it is about - * to close the kernel file descriptor for. The idea there is to avoid - * having to re-open the kernel descriptor later. But it's not real clear - * that this is a performance win; we could end up fsyncing the same file - * multiple times in a transaction, which would probably cost more time - * than is saved by avoiding an open() call. This should be studied. - * - * This routine used to think it could skip the fsync if the file is - * physically closed, but that is now WRONG; see comments for FileMarkDirty. - */ -int -FileSync(File file) -{ - int returnCode; - - Assert(FileIsValid(file)); - - if (!(VfdCache[file].fdstate & FD_DIRTY)) - { - /* Need not sync if file is not dirty. */ - returnCode = 0; - } - else if (!enableFsync) - { - /* Don't force the file open if pg_fsync isn't gonna sync it. */ - returnCode = 0; - VfdCache[file].fdstate &= ~FD_DIRTY; - } - else - { - /* - * We don't use FileAccess() because we don't want to force the - * file to the front of the LRU ring; we aren't expecting to - * access it again soon. - */ - if (FileIsNotOpen(file)) - { - returnCode = LruInsert(file); - if (returnCode != 0) - return returnCode; - } - returnCode = pg_fsync(VfdCache[file].fd); - if (returnCode == 0) - VfdCache[file].fdstate &= ~FD_DIRTY; - } - - return returnCode; -} - -/* - * FileMarkDirty --- mark a file as needing fsync at transaction commit. - * - * Since FileWrite marks the file dirty, this routine is not needed in - * normal use. It is called when the buffer manager detects that some other - * backend has written out a shared buffer that this backend dirtied (but - * didn't write) in the current xact. In that scenario, we need to fsync - * the file before we can commit. We cannot assume that the other backend - * has fsync'd the file yet; we need to do our own fsync to ensure that - * (a) the disk page is written and (b) this backend's commit is delayed - * until the write is complete. - * - * Note we are assuming that an fsync issued by this backend will write - * kernel disk buffers that were dirtied by another backend. Furthermore, - * it doesn't matter whether we currently have the file physically open; - * we must fsync even if we have to re-open the file to do it. - */ -void -FileMarkDirty(File file) -{ - Assert(FileIsValid(file)); - - DO_DB(elog(LOG, "FileMarkDirty: %d (%s)", - file, VfdCache[file].fileName)); - - VfdCache[file].fdstate |= FD_DIRTY; -} - - -/* - * Routines that want to use stdio (ie, FILE*) should use AllocateFile - * rather than plain fopen(). This lets fd.c deal with freeing FDs if - * necessary to open the file. When done, call FreeFile rather than fclose. - * - * Note that files that will be open for any significant length of time - * should NOT be handled this way, since they cannot share kernel file - * descriptors with other files; there is grave risk of running out of FDs - * if anyone locks down too many FDs. Most callers of this routine are - * simply reading a config file that they will read and close immediately. - * - * fd.c will automatically close all files opened with AllocateFile at - * transaction commit or abort; this prevents FD leakage if a routine - * that calls AllocateFile is terminated prematurely by elog(ERROR). - * - * Ideally this should be the *only* direct call of fopen() in the backend. - */ - -FILE * -AllocateFile(char *name, char *mode) -{ - FILE *file; - - DO_DB(elog(LOG, "AllocateFile: Allocated %d", numAllocatedFiles)); - - if (numAllocatedFiles >= MAX_ALLOCATED_FILES) - elog(ERROR, "AllocateFile: too many private FDs demanded"); - -TryAgain: - if ((file = fopen(name, mode)) != NULL) - { - allocatedFiles[numAllocatedFiles] = file; - numAllocatedFiles++; - return file; - } - - if (errno == EMFILE || errno == ENFILE) - { - int save_errno = errno; - - DO_DB(elog(LOG, "AllocateFile: not enough descs, retry, er= %d", - errno)); - errno = 0; - if (ReleaseLruFile()) - goto TryAgain; - errno = save_errno; - } - - return NULL; -} - -void -FreeFile(FILE *file) -{ - int i; - - DO_DB(elog(LOG, "FreeFile: Allocated %d", numAllocatedFiles)); - - /* Remove file from list of allocated files, if it's present */ - for (i = numAllocatedFiles; --i >= 0;) - { - if (allocatedFiles[i] == file) - { - numAllocatedFiles--; - allocatedFiles[i] = allocatedFiles[numAllocatedFiles]; - break; - } - } - if (i < 0) - elog(WARNING, "FreeFile: file was not obtained from AllocateFile"); - - fclose(file); -} - -/* - * closeAllVfds - * - * Force all VFDs into the physically-closed state, so that the fewest - * possible number of kernel file descriptors are in use. There is no - * change in the logical state of the VFDs. - */ -void -closeAllVfds(void) -{ - Index i; - - if (SizeVfdCache > 0) - { - Assert(FileIsNotOpen(0)); /* Make sure ring not corrupted */ - for (i = 1; i < SizeVfdCache; i++) - { - if (!FileIsNotOpen(i)) - LruDelete(i); - } - } -} - -/* - * AtEOXact_Files - * - * This routine is called during transaction commit or abort or backend - * exit (it doesn't particularly care which). All still-open temporary-file - * VFDs are closed, which also causes the underlying files to be deleted. - * Furthermore, all "allocated" stdio files are closed. - * - * This routine is not involved in fsync'ing non-temporary files at xact - * commit; that is done by FileSync under control of the buffer manager. - * During a commit, that is done *before* control gets here. If we still - * have any needs-fsync bits set when we get here, we assume this is abort - * and clear them. - */ -void -AtEOXact_Files(void) -{ - Index i; - - if (SizeVfdCache > 0) - { - Assert(FileIsNotOpen(0)); /* Make sure ring not corrupted */ - for (i = 1; i < SizeVfdCache; i++) - { - if ((VfdCache[i].fdstate & FD_TEMPORARY) && - VfdCache[i].fileName != NULL) - FileClose(i); - else - VfdCache[i].fdstate &= ~FD_DIRTY; - } - } - - while (numAllocatedFiles > 0) - FreeFile(allocatedFiles[0]); - - /* - * Reset the tempfile name counter to 0; not really necessary, but - * helps keep the names from growing unreasonably long. - */ - tempFileCounter = 0; -} - - -/* - * Remove old temporary files - * - * This should be called during postmaster startup. It will forcibly - * remove any leftover files created by OpenTemporaryFile. - */ -void -RemovePgTempFiles(void) -{ - char db_path[MAXPGPATH]; - char temp_path[MAXPGPATH]; - char rm_path[MAXPGPATH]; - DIR *db_dir; - DIR *temp_dir; - struct dirent *db_de; - struct dirent *temp_de; - - /* - * Cycle through pg_tempfiles for all databases and remove old temp - * files. - */ - snprintf(db_path, sizeof(db_path), "%s/base", DataDir); - if ((db_dir = opendir(db_path)) != NULL) - { - while ((db_de = readdir(db_dir)) != NULL) - { - if (strcmp(db_de->d_name, ".") == 0 || - strcmp(db_de->d_name, "..") == 0) - continue; - - snprintf(temp_path, sizeof(temp_path), - "%s/%s/%s", - db_path, db_de->d_name, - PG_TEMP_FILES_DIR); - if ((temp_dir = opendir(temp_path)) != NULL) - { - while ((temp_de = readdir(temp_dir)) != NULL) - { - if (strcmp(temp_de->d_name, ".") == 0 || - strcmp(temp_de->d_name, "..") == 0) - continue; - - snprintf(rm_path, sizeof(temp_path), - "%s/%s/%s/%s", - db_path, db_de->d_name, - PG_TEMP_FILES_DIR, - temp_de->d_name); - - if (strncmp(temp_de->d_name, - PG_TEMP_FILE_PREFIX, - strlen(PG_TEMP_FILE_PREFIX)) == 0) - unlink(rm_path); - else - { - /* - * would prefer to use elog here, but it's not up - * and running during postmaster startup... - */ - fprintf(stderr, - "Unexpected file found in temporary-files directory: %s\n", - rm_path); - } - } - closedir(temp_dir); - } - } - closedir(db_dir); - } -} |