1 files changed, 0 insertions, 1104 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
deleted file mode 100644
index 978d85d4868..00000000000
--- a/src/backend/storage/smgr/md.c
+++ /dev/null
@@ -1,1104 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * md.c
- *	  This code manages relations that reside on magnetic disk.
- *
- * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- *
- * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.91 2002/06/20 20:29:35 momjian Exp $
- *
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-
-#include <errno.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/file.h>
-
-#include "catalog/catalog.h"
-#include "miscadmin.h"
-#include "storage/smgr.h"
-#include "utils/inval.h"
-#include "utils/memutils.h"
-
-
-#undef DIAGNOSTIC
-
-/*
- *	The magnetic disk storage manager keeps track of open file descriptors
- *	in its own descriptor pool.  This happens for two reasons.	First, at
- *	transaction boundaries, we walk the list of descriptors and flush
- *	anything that we've dirtied in the current transaction.  Second, we want
- *	to support relations larger than the OS' file size limit (often 2GBytes).
- *	In order to do that, we break relations up into chunks of < 2GBytes
- *	and store one chunk in each of several files that represent the relation.
- *	See the BLCKSZ and RELSEG_SIZE configuration constants in include/pg_config.h.
- *
- *	The file descriptor stored in the relation cache (see RelationGetFile())
- *	is actually an index into the Md_fdvec array.  -1 indicates not open.
- *
- *	When a relation is broken into multiple chunks, only the first chunk
- *	has its own entry in the Md_fdvec array; the remaining chunks have
- *	palloc'd MdfdVec objects that are chained onto the first chunk via the
- *	mdfd_chain links.  All chunks except the last MUST have size exactly
- *	equal to RELSEG_SIZE blocks --- see mdnblocks() and mdtruncate().
- */
-
-typedef struct _MdfdVec
-{
-	int			mdfd_vfd;		/* fd number in vfd pool */
-	int			mdfd_flags;		/* fd status flags */
-
-/* these are the assigned bits in mdfd_flags: */
-#define MDFD_FREE	(1 << 0)	/* unused entry */
-
-	int			mdfd_nextFree;	/* link to next freelist member, if free */
-#ifndef LET_OS_MANAGE_FILESIZE
-	struct _MdfdVec *mdfd_chain;	/* for large relations */
-#endif
-} MdfdVec;
-
-static int	Nfds = 100;			/* initial/current size of Md_fdvec array */
-static MdfdVec *Md_fdvec = (MdfdVec *) NULL;
-static int	Md_Free = -1;		/* head of freelist of unused fdvec
-								 * entries */
-static int	CurFd = 0;			/* first never-used fdvec index */
-static MemoryContext MdCxt;		/* context for all my allocations */
-
-/* routines declared here */
-static void mdclose_fd(int fd);
-static int	_mdfd_getrelnfd(Relation reln);
-static MdfdVec *_mdfd_openseg(Relation reln, BlockNumber segno, int oflags);
-static MdfdVec *_mdfd_getseg(Relation reln, BlockNumber blkno);
-
-static int	_mdfd_blind_getseg(RelFileNode rnode, BlockNumber blkno);
-
-static int	_fdvec_alloc(void);
-static void _fdvec_free(int);
-static BlockNumber _mdnblocks(File file, Size blcksz);
-
-/*
- *	mdinit() -- Initialize private state for magnetic disk storage manager.
- *
- *		We keep a private table of all file descriptors.  Whenever we do
- *		a write to one, we mark it dirty in our table.	Whenever we force
- *		changes to disk, we mark the file descriptor clean.  At transaction
- *		commit, we force changes to disk for all dirty file descriptors.
- *		This routine allocates and initializes the table.
- *
- *		Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
- */
-int
-mdinit(void)
-{
-	int			i;
-
-	MdCxt = AllocSetContextCreate(TopMemoryContext,
-								  "MdSmgr",
-								  ALLOCSET_DEFAULT_MINSIZE,
-								  ALLOCSET_DEFAULT_INITSIZE,
-								  ALLOCSET_DEFAULT_MAXSIZE);
-
-	Md_fdvec = (MdfdVec *) MemoryContextAlloc(MdCxt, Nfds * sizeof(MdfdVec));
-
-	MemSet(Md_fdvec, 0, Nfds * sizeof(MdfdVec));
-
-	/* Set free list */
-	for (i = 0; i < Nfds; i++)
-	{
-		Md_fdvec[i].mdfd_nextFree = i + 1;
-		Md_fdvec[i].mdfd_flags = MDFD_FREE;
-	}
-	Md_Free = 0;
-	Md_fdvec[Nfds - 1].mdfd_nextFree = -1;
-
-	return SM_SUCCESS;
-}
-
-int
-mdcreate(Relation reln)
-{
-	char	   *path;
-	int			fd,
-				vfd;
-
-	Assert(reln->rd_fd < 0);
-
-	path = relpath(reln->rd_node);
-
-	fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
-
-	if (fd < 0)
-	{
-		int			save_errno = errno;
-
-		/*
-		 * During bootstrap, there are cases where a system relation will
-		 * be accessed (by internal backend processes) before the
-		 * bootstrap script nominally creates it.  Therefore, allow the
-		 * file to exist already, but in bootstrap mode only.  (See also
-		 * mdopen)
-		 */
-		if (IsBootstrapProcessingMode())
-			fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
-		if (fd < 0)
-		{
-			pfree(path);
-			/* be sure to return the error reported by create, not open */
-			errno = save_errno;
-			return -1;
-		}
-		errno = 0;
-	}
-
-	pfree(path);
-
-	vfd = _fdvec_alloc();
-	if (vfd < 0)
-		return -1;
-
-	Md_fdvec[vfd].mdfd_vfd = fd;
-	Md_fdvec[vfd].mdfd_flags = (uint16) 0;
-#ifndef LET_OS_MANAGE_FILESIZE
-	Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL;
-#endif
-
-	return vfd;
-}
-
-/*
- *	mdunlink() -- Unlink a relation.
- */
-int
-mdunlink(RelFileNode rnode)
-{
-	int			status = SM_SUCCESS;
-	int			save_errno = 0;
-	char	   *path;
-
-	path = relpath(rnode);
-
-	/* Delete the first segment, or only segment if not doing segmenting */
-	if (unlink(path) < 0)
-	{
-		status = SM_FAIL;
-		save_errno = errno;
-	}
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	/* Get the additional segments, if any */
-	if (status == SM_SUCCESS)
-	{
-		char	   *segpath = (char *) palloc(strlen(path) + 12);
-		BlockNumber segno;
-
-		for (segno = 1;; segno++)
-		{
-			sprintf(segpath, "%s.%u", path, segno);
-			if (unlink(segpath) < 0)
-			{
-				/* ENOENT is expected after the last segment... */
-				if (errno != ENOENT)
-				{
-					status = SM_FAIL;
-					save_errno = errno;
-				}
-				break;
-			}
-		}
-		pfree(segpath);
-	}
-#endif
-
-	pfree(path);
-
-	errno = save_errno;
-	return status;
-}
-
-/*
- *	mdextend() -- Add a block to the specified relation.
- *
- *		The semantics are basically the same as mdwrite(): write at the
- *		specified position.  However, we are expecting to extend the
- *		relation (ie, blocknum is the current EOF), and so in case of
- *		failure we clean up by truncating.
- *
- *		This routine returns SM_FAIL or SM_SUCCESS, with errno set as
- *		appropriate.
- *
- * Note: this routine used to call mdnblocks() to get the block position
- * to write at, but that's pretty silly since the caller needs to know where
- * the block will be written, and accordingly must have done mdnblocks()
- * already.  Might as well pass in the position and save a seek.
- */
-int
-mdextend(Relation reln, BlockNumber blocknum, char *buffer)
-{
-	long		seekpos;
-	int			nbytes;
-	MdfdVec    *v;
-
-	v = _mdfd_getseg(reln, blocknum);
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
-#ifdef DIAGNOSTIC
-	if (seekpos >= BLCKSZ * RELSEG_SIZE)
-		elog(FATAL, "seekpos too big!");
-#endif
-#else
-	seekpos = (long) (BLCKSZ * (blocknum));
-#endif
-
-	/*
-	 * Note: because caller obtained blocknum by calling mdnblocks, which
-	 * did a seek(SEEK_END), this seek is often redundant and will be
-	 * optimized away by fd.c.	It's not redundant, however, if there is a
-	 * partial page at the end of the file.  In that case we want to try
-	 * to overwrite the partial page with a full page.	It's also not
-	 * redundant if bufmgr.c had to dump another buffer of the same file
-	 * to make room for the new page's buffer.
-	 */
-	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-		return SM_FAIL;
-
-	if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
-	{
-		if (nbytes > 0)
-		{
-			int			save_errno = errno;
-
-			/* Remove the partially-written page */
-			FileTruncate(v->mdfd_vfd, seekpos);
-			FileSeek(v->mdfd_vfd, seekpos, SEEK_SET);
-			errno = save_errno;
-		}
-		return SM_FAIL;
-	}
-
-#ifndef LET_OS_MANAGE_FILESIZE
-#ifdef DIAGNOSTIC
-	if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > ((BlockNumber) RELSEG_SIZE))
-		elog(FATAL, "segment too big!");
-#endif
-#endif
-
-	return SM_SUCCESS;
-}
-
-/*
- *	mdopen() -- Open the specified relation.
- */
-int
-mdopen(Relation reln)
-{
-	char	   *path;
-	int			fd;
-	int			vfd;
-
-	Assert(reln->rd_fd < 0);
-
-	path = relpath(reln->rd_node);
-
-	fd = FileNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
-
-	if (fd < 0)
-	{
-		/*
-		 * During bootstrap, there are cases where a system relation will
-		 * be accessed (by internal backend processes) before the
-		 * bootstrap script nominally creates it.  Therefore, accept
-		 * mdopen() as a substitute for mdcreate() in bootstrap mode only.
-		 * (See mdcreate)
-		 */
-		if (IsBootstrapProcessingMode())
-			fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
-		if (fd < 0)
-		{
-			pfree(path);
-			return -1;
-		}
-	}
-
-	pfree(path);
-
-	vfd = _fdvec_alloc();
-	if (vfd < 0)
-		return -1;
-
-	Md_fdvec[vfd].mdfd_vfd = fd;
-	Md_fdvec[vfd].mdfd_flags = (uint16) 0;
-#ifndef LET_OS_MANAGE_FILESIZE
-	Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL;
-
-#ifdef DIAGNOSTIC
-	if (_mdnblocks(fd, BLCKSZ) > ((BlockNumber) RELSEG_SIZE))
-		elog(FATAL, "segment too big on relopen!");
-#endif
-#endif
-
-	return vfd;
-}
-
-/*
- *	mdclose() -- Close the specified relation, if it isn't closed already.
- *
- *		AND FREE fd vector! It may be re-used for other relation!
- *		reln should be flushed from cache after closing !..
- *
- *		Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
- */
-int
-mdclose(Relation reln)
-{
-	int			fd;
-
-	fd = RelationGetFile(reln);
-	if (fd < 0)
-		return SM_SUCCESS;		/* already closed, so no work */
-
-	mdclose_fd(fd);
-
-	reln->rd_fd = -1;
-
-	return SM_SUCCESS;
-}
-
-static void
-mdclose_fd(int fd)
-{
-	MdfdVec    *v;
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;)
-	{
-		MdfdVec    *ov = v;
-
-		/* if not closed already */
-		if (v->mdfd_vfd >= 0)
-		{
-			/*
-			 * We sync the file descriptor so that we don't need to reopen
-			 * it at transaction commit to force changes to disk.  (This
-			 * is not really optional, because we are about to forget that
-			 * the file even exists...)
-			 */
-			FileSync(v->mdfd_vfd);
-			FileClose(v->mdfd_vfd);
-		}
-		/* Now free vector */
-		v = v->mdfd_chain;
-		if (ov != &Md_fdvec[fd])
-			pfree(ov);
-	}
-
-	Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL;
-#else
-	v = &Md_fdvec[fd];
-	if (v != (MdfdVec *) NULL)
-	{
-		if (v->mdfd_vfd >= 0)
-		{
-			/*
-			 * We sync the file descriptor so that we don't need to reopen
-			 * it at transaction commit to force changes to disk.  (This
-			 * is not really optional, because we are about to forget that
-			 * the file even exists...)
-			 */
-			FileSync(v->mdfd_vfd);
-			FileClose(v->mdfd_vfd);
-		}
-	}
-#endif
-
-	_fdvec_free(fd);
-}
-
-/*
- *	mdread() -- Read the specified block from a relation.
- *
- *		Returns SM_SUCCESS or SM_FAIL.
- */
-int
-mdread(Relation reln, BlockNumber blocknum, char *buffer)
-{
-	int			status;
-	long		seekpos;
-	int			nbytes;
-	MdfdVec    *v;
-
-	v = _mdfd_getseg(reln, blocknum);
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
-
-#ifdef DIAGNOSTIC
-	if (seekpos >= BLCKSZ * RELSEG_SIZE)
-		elog(FATAL, "seekpos too big!");
-#endif
-#else
-	seekpos = (long) (BLCKSZ * (blocknum));
-#endif
-
-	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-		return SM_FAIL;
-
-	status = SM_SUCCESS;
-	if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
-	{
-		/*
-		 * If we are at EOF, return zeroes without complaining. (XXX Is
-		 * this still necessary/a good idea??)
-		 */
-		if (nbytes == 0 ||
-			(nbytes > 0 && mdnblocks(reln) == blocknum))
-			MemSet(buffer, 0, BLCKSZ);
-		else
-			status = SM_FAIL;
-	}
-
-	return status;
-}
-
-/*
- *	mdwrite() -- Write the supplied block at the appropriate location.
- *
- *		Returns SM_SUCCESS or SM_FAIL.
- */
-int
-mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
-{
-	long		seekpos;
-	MdfdVec    *v;
-
-	v = _mdfd_getseg(reln, blocknum);
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
-#ifdef DIAGNOSTIC
-	if (seekpos >= BLCKSZ * RELSEG_SIZE)
-		elog(FATAL, "seekpos too big!");
-#endif
-#else
-	seekpos = (long) (BLCKSZ * (blocknum));
-#endif
-
-	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-		return SM_FAIL;
-
-	if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
-		return SM_FAIL;
-
-	return SM_SUCCESS;
-}
-
-/*
- *	mdflush() -- Synchronously write a block to disk.
- *
- *		This is exactly like mdwrite(), but doesn't return until the file
- *		system buffer cache has been flushed.
- */
-int
-mdflush(Relation reln, BlockNumber blocknum, char *buffer)
-{
-	int			status;
-	long		seekpos;
-	MdfdVec    *v;
-
-	v = _mdfd_getseg(reln, blocknum);
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
-#ifdef DIAGNOSTIC
-	if (seekpos >= BLCKSZ * RELSEG_SIZE)
-		elog(FATAL, "seekpos too big!");
-#endif
-#else
-	seekpos = (long) (BLCKSZ * (blocknum));
-#endif
-
-	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-		return SM_FAIL;
-
-	/* write and sync the block */
-	status = SM_SUCCESS;
-	if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ
-		|| FileSync(v->mdfd_vfd) < 0)
-		status = SM_FAIL;
-
-	return status;
-}
-
-/*
- *	mdblindwrt() -- Write a block to disk blind.
- *
- *		We have to be able to do this using only the name and OID of
- *		the database and relation in which the block belongs.  Otherwise
- *		this is much like mdwrite().  If dofsync is TRUE, then we fsync
- *		the file, making it more like mdflush().
- */
-int
-mdblindwrt(RelFileNode rnode,
-		   BlockNumber blkno,
-		   char *buffer,
-		   bool dofsync)
-{
-	int			status;
-	long		seekpos;
-	int			fd;
-
-	fd = _mdfd_blind_getseg(rnode, blkno);
-
-	if (fd < 0)
-		return SM_FAIL;
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	seekpos = (long) (BLCKSZ * (blkno % ((BlockNumber) RELSEG_SIZE)));
-#ifdef DIAGNOSTIC
-	if (seekpos >= BLCKSZ * RELSEG_SIZE)
-		elog(FATAL, "seekpos too big!");
-#endif
-#else
-	seekpos = (long) (BLCKSZ * (blkno));
-#endif
-
-	errno = 0;
-
-	if (lseek(fd, seekpos, SEEK_SET) != seekpos)
-	{
-		elog(LOG, "mdblindwrt: lseek(%ld) failed: %m", seekpos);
-		close(fd);
-		return SM_FAIL;
-	}
-
-	status = SM_SUCCESS;
-
-	/* write and optionally sync the block */
-	errno = 0;
-	if (write(fd, buffer, BLCKSZ) != BLCKSZ)
-	{
-		/* if write didn't set errno, assume problem is no disk space */
-		if (errno == 0)
-			errno = ENOSPC;
-		elog(LOG, "mdblindwrt: write() failed: %m");
-		status = SM_FAIL;
-	}
-
-	if (close(fd) < 0)
-	{
-		elog(LOG, "mdblindwrt: close() failed: %m");
-		status = SM_FAIL;
-	}
-
-	return status;
-}
-
-/*
- *	mdmarkdirty() -- Mark the specified block "dirty" (ie, needs fsync).
- *
- *		Returns SM_SUCCESS or SM_FAIL.
- */
-int
-mdmarkdirty(Relation reln, BlockNumber blkno)
-{
-	MdfdVec    *v;
-
-	v = _mdfd_getseg(reln, blkno);
-
-	FileMarkDirty(v->mdfd_vfd);
-
-	return SM_SUCCESS;
-}
-
-/*
- *	mdblindmarkdirty() -- Mark the specified block "dirty" (ie, needs fsync).
- *
- *		We have to be able to do this using only the name and OID of
- *		the database and relation in which the block belongs.  Otherwise
- *		this is much like mdmarkdirty().  However, we do the fsync immediately
- *		rather than building md/fd datastructures to postpone it till later.
- */
-int
-mdblindmarkdirty(RelFileNode rnode,
-				 BlockNumber blkno)
-{
-	int			status;
-	int			fd;
-
-	fd = _mdfd_blind_getseg(rnode, blkno);
-
-	if (fd < 0)
-		return SM_FAIL;
-
-	status = SM_SUCCESS;
-
-	if (pg_fsync(fd) < 0)
-		status = SM_FAIL;
-
-	if (close(fd) < 0)
-		status = SM_FAIL;
-
-	return status;
-}
-
-/*
- *	mdnblocks() -- Get the number of blocks stored in a relation.
- *
- *		Important side effect: all segments of the relation are opened
- *		and added to the mdfd_chain list.  If this routine has not been
- *		called, then only segments up to the last one actually touched
- *		are present in the chain...
- *
- *		Returns # of blocks, elog's on error.
- */
-BlockNumber
-mdnblocks(Relation reln)
-{
-	int			fd;
-	MdfdVec    *v;
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	BlockNumber nblocks;
-	BlockNumber segno;
-#endif
-
-	fd = _mdfd_getrelnfd(reln);
-	v = &Md_fdvec[fd];
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	segno = 0;
-	for (;;)
-	{
-		nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ);
-		if (nblocks > ((BlockNumber) RELSEG_SIZE))
-			elog(FATAL, "segment too big in mdnblocks!");
-		if (nblocks < ((BlockNumber) RELSEG_SIZE))
-			return (segno * ((BlockNumber) RELSEG_SIZE)) + nblocks;
-
-		/*
-		 * If segment is exactly RELSEG_SIZE, advance to next one.
-		 */
-		segno++;
-
-		if (v->mdfd_chain == (MdfdVec *) NULL)
-		{
-			/*
-			 * Because we pass O_CREAT, we will create the next segment
-			 * (with zero length) immediately, if the last segment is of
-			 * length REL_SEGSIZE.	This is unnecessary but harmless, and
-			 * testing for the case would take more cycles than it seems
-			 * worth.
-			 */
-			v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
-			if (v->mdfd_chain == (MdfdVec *) NULL)
-				elog(ERROR, "cannot count blocks for %s -- open failed: %m",
-					 RelationGetRelationName(reln));
-		}
-
-		v = v->mdfd_chain;
-	}
-#else
-	return _mdnblocks(v->mdfd_vfd, BLCKSZ);
-#endif
-}
-
-/*
- *	mdtruncate() -- Truncate relation to specified number of blocks.
- *
- *		Returns # of blocks or InvalidBlockNumber on error.
- */
-BlockNumber
-mdtruncate(Relation reln, BlockNumber nblocks)
-{
-	int			fd;
-	MdfdVec    *v;
-	BlockNumber curnblk;
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	BlockNumber priorblocks;
-#endif
-
-	/*
-	 * NOTE: mdnblocks makes sure we have opened all existing segments, so
-	 * that truncate/delete loop will get them all!
-	 */
-	curnblk = mdnblocks(reln);
-	if (nblocks > curnblk)
-		return InvalidBlockNumber;		/* bogus request */
-	if (nblocks == curnblk)
-		return nblocks;			/* no work */
-
-	fd = _mdfd_getrelnfd(reln);
-	v = &Md_fdvec[fd];
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	priorblocks = 0;
-	while (v != (MdfdVec *) NULL)
-	{
-		MdfdVec    *ov = v;
-
-		if (priorblocks > nblocks)
-		{
-			/*
-			 * This segment is no longer wanted at all (and has already
-			 * been unlinked from the mdfd_chain). We truncate the file
-			 * before deleting it because if other backends are holding
-			 * the file open, the unlink will fail on some platforms.
-			 * Better a zero-size file gets left around than a big file...
-			 */
-			FileTruncate(v->mdfd_vfd, 0);
-			FileUnlink(v->mdfd_vfd);
-			v = v->mdfd_chain;
-			Assert(ov != &Md_fdvec[fd]);		/* we never drop the 1st
-												 * segment */
-			pfree(ov);
-		}
-		else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
-		{
-			/*
-			 * This is the last segment we want to keep. Truncate the file
-			 * to the right length, and clear chain link that points to
-			 * any remaining segments (which we shall zap). NOTE: if
-			 * nblocks is exactly a multiple K of RELSEG_SIZE, we will
-			 * truncate the K+1st segment to 0 length but keep it. This is
-			 * mainly so that the right thing happens if nblocks==0.
-			 */
-			BlockNumber lastsegblocks = nblocks - priorblocks;
-
-			if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0)
-				return InvalidBlockNumber;
-			v = v->mdfd_chain;
-			ov->mdfd_chain = (MdfdVec *) NULL;
-		}
-		else
-		{
-			/*
-			 * We still need this segment and 0 or more blocks beyond it,
-			 * so nothing to do here.
-			 */
-			v = v->mdfd_chain;
-		}
-		priorblocks += RELSEG_SIZE;
-	}
-#else
-	if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0)
-		return InvalidBlockNumber;
-#endif
-
-	return nblocks;
-}
-
-/*
- *	mdcommit() -- Commit a transaction.
- *
- *		All changes to magnetic disk relations must be forced to stable
- *		storage.  This routine makes a pass over the private table of
- *		file descriptors.  Any descriptors to which we have done writes,
- *		but not synced, are synced here.
- *
- *		Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
- */
-int
-mdcommit()
-{
-	int			i;
-	MdfdVec    *v;
-
-	for (i = 0; i < CurFd; i++)
-	{
-		v = &Md_fdvec[i];
-		if (v->mdfd_flags & MDFD_FREE)
-			continue;
-		/* Sync the file entry */
-#ifndef LET_OS_MANAGE_FILESIZE
-		for (; v != (MdfdVec *) NULL; v = v->mdfd_chain)
-#else
-		if (v != (MdfdVec *) NULL)
-#endif
-		{
-			if (FileSync(v->mdfd_vfd) < 0)
-				return SM_FAIL;
-		}
-	}
-
-	return SM_SUCCESS;
-}
-
-/*
- *	mdabort() -- Abort a transaction.
- *
- *		Changes need not be forced to disk at transaction abort.  We mark
- *		all file descriptors as clean here.  Always returns SM_SUCCESS.
- */
-int
-mdabort()
-{
-	/*
-	 * We don't actually have to do anything here.  fd.c will discard
-	 * fsync-needed bits in its AtEOXact_Files() routine.
-	 */
-	return SM_SUCCESS;
-}
-
-/*
- *	mdsync() -- Sync storage.
- *
- */
-int
-mdsync()
-{
-	sync();
-	if (IsUnderPostmaster)
-		sleep(2);
-	sync();
-	return SM_SUCCESS;
-}
-
-/*
- *	_fdvec_alloc () -- grab a free (or new) md file descriptor vector.
- *
- */
-static
-int
-_fdvec_alloc()
-{
-	MdfdVec    *nvec;
-	int			fdvec,
-				i;
-
-	if (Md_Free >= 0)			/* get from free list */
-	{
-		fdvec = Md_Free;
-		Md_Free = Md_fdvec[fdvec].mdfd_nextFree;
-		Assert(Md_fdvec[fdvec].mdfd_flags == MDFD_FREE);
-		Md_fdvec[fdvec].mdfd_flags = 0;
-		if (fdvec >= CurFd)
-		{
-			Assert(fdvec == CurFd);
-			CurFd++;
-		}
-		return fdvec;
-	}
-
-	/* Must allocate more room */
-
-	if (Nfds != CurFd)
-		elog(FATAL, "_fdvec_alloc error");
-
-	Nfds *= 2;
-
-	nvec = (MdfdVec *) MemoryContextAlloc(MdCxt, Nfds * sizeof(MdfdVec));
-	MemSet(nvec, 0, Nfds * sizeof(MdfdVec));
-	memcpy(nvec, (char *) Md_fdvec, CurFd * sizeof(MdfdVec));
-	pfree(Md_fdvec);
-
-	Md_fdvec = nvec;
-
-	/* Set new free list */
-	for (i = CurFd; i < Nfds; i++)
-	{
-		Md_fdvec[i].mdfd_nextFree = i + 1;
-		Md_fdvec[i].mdfd_flags = MDFD_FREE;
-	}
-	Md_fdvec[Nfds - 1].mdfd_nextFree = -1;
-	Md_Free = CurFd + 1;
-
-	fdvec = CurFd;
-	CurFd++;
-	Md_fdvec[fdvec].mdfd_flags = 0;
-
-	return fdvec;
-}
-
-/*
- *	_fdvec_free () -- free md file descriptor vector.
- *
- */
-static
-void
-_fdvec_free(int fdvec)
-{
-
-	Assert(Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE);
-	Assert(Md_fdvec[fdvec].mdfd_flags != MDFD_FREE);
-	Md_fdvec[fdvec].mdfd_nextFree = Md_Free;
-	Md_fdvec[fdvec].mdfd_flags = MDFD_FREE;
-	Md_Free = fdvec;
-}
-
-static MdfdVec *
-_mdfd_openseg(Relation reln, BlockNumber segno, int oflags)
-{
-	MdfdVec    *v;
-	int			fd;
-	char	   *path,
-			   *fullpath;
-
-	/* be sure we have enough space for the '.segno', if any */
-	path = relpath(reln->rd_node);
-
-	if (segno > 0)
-	{
-		fullpath = (char *) palloc(strlen(path) + 12);
-		sprintf(fullpath, "%s.%u", path, segno);
-		pfree(path);
-	}
-	else
-		fullpath = path;
-
-	/* open the file */
-	fd = FileNameOpenFile(fullpath, O_RDWR | PG_BINARY | oflags, 0600);
-
-	pfree(fullpath);
-
-	if (fd < 0)
-		return (MdfdVec *) NULL;
-
-	/* allocate an mdfdvec entry for it */
-	v = (MdfdVec *) MemoryContextAlloc(MdCxt, sizeof(MdfdVec));
-
-	/* fill the entry */
-	v->mdfd_vfd = fd;
-	v->mdfd_flags = (uint16) 0;
-#ifndef LET_OS_MANAGE_FILESIZE
-	v->mdfd_chain = (MdfdVec *) NULL;
-
-#ifdef DIAGNOSTIC
-	if (_mdnblocks(fd, BLCKSZ) > ((BlockNumber) RELSEG_SIZE))
-		elog(FATAL, "segment too big on openseg!");
-#endif
-#endif
-
-	/* all done */
-	return v;
-}
-
-/* Get the fd for the relation, opening it if it's not already open */
-
-static int
-_mdfd_getrelnfd(Relation reln)
-{
-	int			fd;
-
-	fd = RelationGetFile(reln);
-	if (fd < 0)
-	{
-		if ((fd = mdopen(reln)) < 0)
-			elog(ERROR, "_mdfd_getrelnfd: cannot open relation %s: %m",
-				 RelationGetRelationName(reln));
-		reln->rd_fd = fd;
-	}
-	return fd;
-}
-
-/* Find the segment of the relation holding the specified block */
-
-static MdfdVec *
-_mdfd_getseg(Relation reln, BlockNumber blkno)
-{
-	MdfdVec    *v;
-	int			fd;
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	BlockNumber segno;
-	BlockNumber i;
-#endif
-
-	fd = _mdfd_getrelnfd(reln);
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	for (v = &Md_fdvec[fd], segno = blkno / ((BlockNumber) RELSEG_SIZE), i = 1;
-		 segno > 0;
-		 i++, segno--)
-	{
-
-		if (v->mdfd_chain == (MdfdVec *) NULL)
-		{
-			/*
-			 * We will create the next segment only if the target block is
-			 * within it.  This prevents Sorcerer's Apprentice syndrome if
-			 * a bug at higher levels causes us to be handed a
-			 * ridiculously large blkno --- otherwise we could create many
-			 * thousands of empty segment files before reaching the
-			 * "target" block.	We should never need to create more than
-			 * one new segment per call, so this restriction seems
-			 * reasonable.
-			 */
-			v->mdfd_chain = _mdfd_openseg(reln, i, (segno == 1) ? O_CREAT : 0);
-
-			if (v->mdfd_chain == (MdfdVec *) NULL)
-				elog(ERROR, "cannot open segment %u of relation %s (target block %u): %m",
-					 i, RelationGetRelationName(reln), blkno);
-		}
-		v = v->mdfd_chain;
-	}
-#else
-	v = &Md_fdvec[fd];
-#endif
-
-	return v;
-}
-
-/*
- * Find the segment of the relation holding the specified block.
- *
- * This performs the same work as _mdfd_getseg() except that we must work
- * "blind" with no Relation struct.  We assume that we are not likely to
- * touch the same relation again soon, so we do not create an FD entry for
- * the relation --- we just open a kernel file descriptor which will be
- * used and promptly closed.  We also assume that the target block already
- * exists, ie, we need not extend the relation.
- *
- * The return value is the kernel descriptor, or -1 on failure.
- */
-
-static int
-_mdfd_blind_getseg(RelFileNode rnode, BlockNumber blkno)
-{
-	char	   *path;
-	int			fd;
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	BlockNumber segno;
-#endif
-
-	path = relpath(rnode);
-
-#ifndef LET_OS_MANAGE_FILESIZE
-	/* append the '.segno', if needed */
-	segno = blkno / ((BlockNumber) RELSEG_SIZE);
-	if (segno > 0)
-	{
-		char	   *segpath = (char *) palloc(strlen(path) + 12);
-
-		sprintf(segpath, "%s.%u", path, segno);
-		pfree(path);
-		path = segpath;
-	}
-#endif
-
-	/* call fd.c to allow other FDs to be closed if needed */
-	fd = BasicOpenFile(path, O_RDWR | PG_BINARY, 0600);
-	if (fd < 0)
-		elog(LOG, "_mdfd_blind_getseg: couldn't open %s: %m", path);
-
-	pfree(path);
-
-	return fd;
-}
-
-static BlockNumber
-_mdnblocks(File file, Size blcksz)
-{
-	long		len;
-
-	len = FileSeek(file, 0L, SEEK_END);
-	if (len < 0)
-		return 0;				/* on failure, assume file is empty */
-	return (BlockNumber) (len / blcksz);
-}