summaryrefslogtreecommitdiff
path: root/src/backend/utils/cache/relcache.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/cache/relcache.c')
-rw-r--r--src/backend/utils/cache/relcache.c503
1 files changed, 365 insertions, 138 deletions
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 639593743d0..4ad76cf78e1 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -8,14 +8,15 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.288 2009/07/29 20:56:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.289 2009/08/12 20:53:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* INTERFACE ROUTINES
* RelationCacheInitialize - initialize relcache (to empty)
- * RelationCacheInitializePhase2 - finish initializing relcache
+ * RelationCacheInitializePhase2 - initialize shared-catalog entries
+ * RelationCacheInitializePhase3 - finish initializing relcache
* RelationIdGetRelation - get a reldesc by relation id
* RelationClose - close an open relation
*
@@ -30,7 +31,6 @@
#include <unistd.h>
#include "access/genam.h"
-#include "access/heapam.h"
#include "access/reloptions.h"
#include "access/sysattr.h"
#include "access/xact.h"
@@ -43,10 +43,12 @@
#include "catalog/pg_attrdef.h"
#include "catalog/pg_authid.h"
#include "catalog/pg_constraint.h"
+#include "catalog/pg_database.h"
#include "catalog/pg_namespace.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_rewrite.h"
+#include "catalog/pg_tablespace.h"
#include "catalog/pg_type.h"
#include "commands/trigger.h"
#include "miscadmin.h"
@@ -70,20 +72,21 @@
/*
- * name of relcache init file, used to speed up backend startup
+ * name of relcache init file(s), used to speed up backend startup
*/
#define RELCACHE_INIT_FILENAME "pg_internal.init"
-#define RELCACHE_INIT_FILEMAGIC 0x573264 /* version ID value */
+#define RELCACHE_INIT_FILEMAGIC 0x573265 /* version ID value */
/*
* hardcoded tuple descriptors. see include/catalog/pg_attribute.h
*/
-static FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
-static FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
-static FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
-static FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
-static FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
+static const FormData_pg_attribute Desc_pg_class[Natts_pg_class] = {Schema_pg_class};
+static const FormData_pg_attribute Desc_pg_attribute[Natts_pg_attribute] = {Schema_pg_attribute};
+static const FormData_pg_attribute Desc_pg_proc[Natts_pg_proc] = {Schema_pg_proc};
+static const FormData_pg_attribute Desc_pg_type[Natts_pg_type] = {Schema_pg_type};
+static const FormData_pg_attribute Desc_pg_database[Natts_pg_database] = {Schema_pg_database};
+static const FormData_pg_attribute Desc_pg_index[Natts_pg_index] = {Schema_pg_index};
/*
* Hash tables that index the relation cache
@@ -106,6 +109,12 @@ static HTAB *RelationIdCache;
bool criticalRelcachesBuilt = false;
/*
+ * This flag is false until we have prepared the critical relcache entries
+ * for shared catalogs (specifically, pg_database and its indexes).
+ */
+bool criticalSharedRelcachesBuilt = false;
+
+/*
* This counter counts relcache inval events received since backend startup
* (but only for rels that are actually in cache). Presently, we use it only
* to detect whether data about to be written by write_relcache_init_file()
@@ -114,8 +123,10 @@ bool criticalRelcachesBuilt = false;
static long relcacheInvalsReceived = 0L;
/*
- * This list remembers the OIDs of the relations cached in the relcache
- * init file.
+ * This list remembers the OIDs of the non-shared relations cached in the
+ * database's local relcache init file. Note that there is no corresponding
+ * list for the shared relcache init file, for reasons explained in the
+ * comments for RelationCacheInitFileRemove.
*/
static List *initFileRelationIds = NIL;
@@ -188,12 +199,12 @@ static void RelationClearRelation(Relation relation, bool rebuild);
static void RelationReloadIndexInfo(Relation relation);
static void RelationFlushRelation(Relation relation);
-static bool load_relcache_init_file(void);
-static void write_relcache_init_file(void);
+static bool load_relcache_init_file(bool shared);
+static void write_relcache_init_file(bool shared);
static void write_item(const void *data, Size len, FILE *fp);
-static void formrdesc(const char *relationName, Oid relationReltype,
- bool hasoids, int natts, FormData_pg_attribute *att);
+static void formrdesc(const char *relationName, bool isshared,
+ bool hasoids, int natts, const FormData_pg_attribute *attrs);
static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK);
static Relation AllocateRelationDesc(Relation relation, Form_pg_class relp);
@@ -201,6 +212,7 @@ static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
static void RelationBuildTupleDesc(Relation relation);
static Relation RelationBuildDesc(Oid targetRelId, Relation oldrelation);
static void RelationInitPhysicalAddr(Relation relation);
+static void load_critical_index(Oid indexoid);
static TupleDesc GetPgClassDescriptor(void);
static TupleDesc GetPgIndexDescriptor(void);
static void AttrDefaultFetch(Relation relation);
@@ -217,6 +229,8 @@ static void IndexSupportInitialize(oidvector *indclass,
static OpClassCacheEnt *LookupOpclassInfo(Oid operatorClassOid,
StrategyNumber numStrats,
StrategyNumber numSupport);
+static void RelationCacheInitFileRemoveInDir(const char *tblspcpath);
+static void unlink_initfile(const char *initfilename);
/*
@@ -240,6 +254,15 @@ ScanPgRelation(Oid targetRelId, bool indexOK)
ScanKeyData key[1];
/*
+ * If something goes wrong during backend startup, we might find ourselves
+ * trying to read pg_class before we've selected a database. That ain't
+ * gonna work, so bail out with a useful error message. If this happens,
+ * it probably means a relcache entry that needs to be nailed isn't.
+ */
+ if (!OidIsValid(MyDatabaseId))
+ elog(FATAL, "cannot read pg_class without having selected a database");
+
+ /*
* form a scan key
*/
ScanKeyInit(&key[0],
@@ -1332,24 +1355,27 @@ LookupOpclassInfo(Oid operatorClassOid,
/*
* formrdesc
*
- * This is a special cut-down version of RelationBuildDesc()
- * used by RelationCacheInitializePhase2() in initializing the relcache.
+ * This is a special cut-down version of RelationBuildDesc(),
+ * used while initializing the relcache.
* The relation descriptor is built just from the supplied parameters,
* without actually looking at any system table entries. We cheat
* quite a lot since we only need to work for a few basic system
* catalogs.
*
- * formrdesc is currently used for: pg_class, pg_attribute, pg_proc,
- * and pg_type (see RelationCacheInitializePhase2).
+ * formrdesc is currently used for: pg_database, pg_class, pg_attribute,
+ * pg_proc, and pg_type (see RelationCacheInitializePhase2/3).
*
* Note that these catalogs can't have constraints (except attnotnull),
* default values, rules, or triggers, since we don't cope with any of that.
+ * (Well, actually, this only matters for properties that need to be valid
+ * during bootstrap or before RelationCacheInitializePhase3 runs, and none of
+ * these properties matter then...)
*
* NOTE: we assume we are already switched into CacheMemoryContext.
*/
static void
-formrdesc(const char *relationName, Oid relationReltype,
- bool hasoids, int natts, FormData_pg_attribute *att)
+formrdesc(const char *relationName, bool isshared,
+ bool hasoids, int natts, const FormData_pg_attribute *attrs)
{
Relation relation;
int i;
@@ -1385,21 +1411,21 @@ formrdesc(const char *relationName, Oid relationReltype,
* initialize relation tuple form
*
* The data we insert here is pretty incomplete/bogus, but it'll serve to
- * get us launched. RelationCacheInitializePhase2() will read the real
+ * get us launched. RelationCacheInitializePhase3() will read the real
* data from pg_class and replace what we've done here.
*/
relation->rd_rel = (Form_pg_class) palloc0(CLASS_TUPLE_SIZE);
namestrcpy(&relation->rd_rel->relname, relationName);
relation->rd_rel->relnamespace = PG_CATALOG_NAMESPACE;
- relation->rd_rel->reltype = relationReltype;
/*
* It's important to distinguish between shared and non-shared relations,
- * even at bootstrap time, to make sure we know where they are stored. At
- * present, all relations that formrdesc is used for are not shared.
+ * even at bootstrap time, to make sure we know where they are stored.
*/
- relation->rd_rel->relisshared = false;
+ relation->rd_rel->relisshared = isshared;
+ if (isshared)
+ relation->rd_rel->reltablespace = GLOBALTABLESPACE_OID;
/*
* Likewise, we must know if a relation is temp ... but formrdesc is not
@@ -1423,9 +1449,6 @@ formrdesc(const char *relationName, Oid relationReltype,
relation->rd_att = CreateTemplateTupleDesc(natts, hasoids);
relation->rd_att->tdrefcount = 1; /* mark as refcounted */
- relation->rd_att->tdtypeid = relationReltype;
- relation->rd_att->tdtypmod = -1; /* unnecessary, but... */
-
/*
* initialize tuple desc info
*/
@@ -1433,9 +1456,9 @@ formrdesc(const char *relationName, Oid relationReltype,
for (i = 0; i < natts; i++)
{
memcpy(relation->rd_att->attrs[i],
- &att[i],
+ &attrs[i],
ATTRIBUTE_FIXED_PART_SIZE);
- has_not_null |= att[i].attnotnull;
+ has_not_null |= attrs[i].attnotnull;
/* make sure attcacheoff is valid */
relation->rd_att->attrs[i]->attcacheoff = -1;
}
@@ -1637,6 +1660,31 @@ RelationReloadIndexInfo(Relation relation)
Assert(relation->rd_smgr == NULL);
/*
+ * Must reset targblock, fsm_nblocks and vm_nblocks in case rel was
+ * truncated
+ */
+ relation->rd_targblock = InvalidBlockNumber;
+ relation->rd_fsm_nblocks = InvalidBlockNumber;
+ relation->rd_vm_nblocks = InvalidBlockNumber;
+ /* Must free any AM cached data, too */
+ if (relation->rd_amcache)
+ pfree(relation->rd_amcache);
+ relation->rd_amcache = NULL;
+
+ /*
+ * If it's a shared index, we might be called before backend startup
+ * has finished selecting a database, in which case we have no way to
+ * read pg_class yet. However, a shared index can never have any
+ * significant schema updates, so it's okay to ignore the invalidation
+ * signal. Just mark it valid and return without doing anything more.
+ */
+ if (relation->rd_rel->relisshared && !criticalRelcachesBuilt)
+ {
+ relation->rd_isvalid = true;
+ return;
+ }
+
+ /*
* Read the pg_class row
*
* Don't try to use an indexscan of pg_class_oid_index to reload the info
@@ -1659,18 +1707,6 @@ RelationReloadIndexInfo(Relation relation)
RelationInitPhysicalAddr(relation);
/*
- * Must reset targblock, fsm_nblocks and vm_nblocks in case rel was
- * truncated
- */
- relation->rd_targblock = InvalidBlockNumber;
- relation->rd_fsm_nblocks = InvalidBlockNumber;
- relation->rd_vm_nblocks = InvalidBlockNumber;
- /* Must free any AM cached data, too */
- if (relation->rd_amcache)
- pfree(relation->rd_amcache);
- relation->rd_amcache = NULL;
-
- /*
* For a non-system index, there are fields of the pg_index row that are
* allowed to change, so re-read that row and update the relcache entry.
* Most of the info derived from pg_index (such as support function lookup
@@ -2304,10 +2340,12 @@ RelationBuildLocalRelation(const char *relname,
/*
* check for creation of a rel that must be nailed in cache.
*
- * XXX this list had better match RelationCacheInitializePhase2's list.
+ * XXX this list had better match the relations specially handled in
+ * RelationCacheInitializePhase2/3.
*/
switch (relid)
{
+ case DatabaseRelationId:
case RelationRelationId:
case AttributeRelationId:
case ProcedureRelationId:
@@ -2489,23 +2527,65 @@ RelationCacheInitialize(void)
/*
* RelationCacheInitializePhase2
*
- * This is called as soon as the catcache and transaction system
- * are functional. At this point we can actually read data from
- * the system catalogs. We first try to read pre-computed relcache
- * entries from the pg_internal.init file. If that's missing or
- * broken, make phony entries for the minimum set of nailed-in-cache
- * relations. Then (unless bootstrapping) make sure we have entries
- * for the critical system indexes. Once we've done all this, we
- * have enough infrastructure to open any system catalog or use any
- * catcache. The last step is to rewrite pg_internal.init if needed.
+ * This is called to prepare for access to pg_database during startup.
+ * We must at least set up a nailed reldesc for pg_database. Ideally
+ * we'd like to have reldescs for its indexes, too. We attempt to
+ * load this information from the shared relcache init file. If that's
+ * missing or broken, just make a phony entry for pg_database.
+ * RelationCacheInitializePhase3 will clean up as needed.
*/
void
RelationCacheInitializePhase2(void)
{
+ MemoryContext oldcxt;
+
+ /*
+ * In bootstrap mode, pg_database isn't there yet anyway, so do nothing.
+ */
+ if (IsBootstrapProcessingMode())
+ return;
+
+ /*
+ * switch to cache memory context
+ */
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+
+ /*
+ * Try to load the shared relcache cache file. If unsuccessful,
+ * bootstrap the cache with a pre-made descriptor for pg_database.
+ */
+ if (!load_relcache_init_file(true))
+ {
+ formrdesc("pg_database", true,
+ true, Natts_pg_database, Desc_pg_database);
+
+#define NUM_CRITICAL_SHARED_RELS 1 /* fix if you change list above */
+ }
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * RelationCacheInitializePhase3
+ *
+ * This is called as soon as the catcache and transaction system
+ * are functional and we have determined MyDatabaseId. At this point
+ * we can actually read data from the database's system catalogs.
+ * We first try to read pre-computed relcache entries from the local
+ * relcache init file. If that's missing or broken, make phony entries
+ * for the minimum set of nailed-in-cache relations. Then (unless
+ * bootstrapping) make sure we have entries for the critical system
+ * indexes. Once we've done all this, we have enough infrastructure to
+ * open any system catalog or use any catcache. The last step is to
+ * rewrite the cache files if needed.
+ */
+void
+RelationCacheInitializePhase3(void)
+{
HASH_SEQ_STATUS status;
RelIdCacheEnt *idhentry;
MemoryContext oldcxt;
- bool needNewCacheFile = false;
+ bool needNewCacheFile = !criticalSharedRelcachesBuilt;
/*
* switch to cache memory context
@@ -2513,25 +2593,25 @@ RelationCacheInitializePhase2(void)
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
/*
- * Try to load the relcache cache file. If unsuccessful, bootstrap the
- * cache with pre-made descriptors for the critical "nailed-in" system
- * catalogs.
+ * Try to load the local relcache cache file. If unsuccessful,
+ * bootstrap the cache with pre-made descriptors for the critical
+ * "nailed-in" system catalogs.
*/
if (IsBootstrapProcessingMode() ||
- !load_relcache_init_file())
+ !load_relcache_init_file(false))
{
needNewCacheFile = true;
- formrdesc("pg_class", PG_CLASS_RELTYPE_OID,
+ formrdesc("pg_class", false,
true, Natts_pg_class, Desc_pg_class);
- formrdesc("pg_attribute", PG_ATTRIBUTE_RELTYPE_OID,
+ formrdesc("pg_attribute", false,
false, Natts_pg_attribute, Desc_pg_attribute);
- formrdesc("pg_proc", PG_PROC_RELTYPE_OID,
+ formrdesc("pg_proc", false,
true, Natts_pg_proc, Desc_pg_proc);
- formrdesc("pg_type", PG_TYPE_RELTYPE_OID,
+ formrdesc("pg_type", false,
true, Natts_pg_type, Desc_pg_type);
-#define NUM_CRITICAL_RELS 4 /* fix if you change list above */
+#define NUM_CRITICAL_LOCAL_RELS 4 /* fix if you change list above */
}
MemoryContextSwitchTo(oldcxt);
@@ -2567,36 +2647,40 @@ RelationCacheInitializePhase2(void)
*/
if (!criticalRelcachesBuilt)
{
- Relation ird;
-
-#define LOAD_CRIT_INDEX(indexoid) \
- do { \
- LockRelationOid(indexoid, AccessShareLock); \
- ird = RelationBuildDesc(indexoid, NULL); \
- if (ird == NULL) \
- elog(PANIC, "could not open critical system index %u", \
- indexoid); \
- ird->rd_isnailed = true; \
- ird->rd_refcnt = 1; \
- UnlockRelationOid(indexoid, AccessShareLock); \
- } while (0)
-
- LOAD_CRIT_INDEX(ClassOidIndexId);
- LOAD_CRIT_INDEX(AttributeRelidNumIndexId);
- LOAD_CRIT_INDEX(IndexRelidIndexId);
- LOAD_CRIT_INDEX(OpclassOidIndexId);
- LOAD_CRIT_INDEX(AccessMethodStrategyIndexId);
- LOAD_CRIT_INDEX(AccessMethodProcedureIndexId);
- LOAD_CRIT_INDEX(OperatorOidIndexId);
- LOAD_CRIT_INDEX(RewriteRelRulenameIndexId);
- LOAD_CRIT_INDEX(TriggerRelidNameIndexId);
-
-#define NUM_CRITICAL_INDEXES 9 /* fix if you change list above */
+ load_critical_index(ClassOidIndexId);
+ load_critical_index(AttributeRelidNumIndexId);
+ load_critical_index(IndexRelidIndexId);
+ load_critical_index(OpclassOidIndexId);
+ load_critical_index(AccessMethodStrategyIndexId);
+ load_critical_index(AccessMethodProcedureIndexId);
+ load_critical_index(OperatorOidIndexId);
+ load_critical_index(RewriteRelRulenameIndexId);
+ load_critical_index(TriggerRelidNameIndexId);
+
+#define NUM_CRITICAL_LOCAL_INDEXES 9 /* fix if you change list above */
criticalRelcachesBuilt = true;
}
/*
+ * Process critical shared indexes too.
+ *
+ * DatabaseNameIndexId isn't critical for relcache loading, but rather
+ * for initial lookup of MyDatabaseId, without which we'll never find
+ * any non-shared catalogs at all. Autovacuum calls InitPostgres with
+ * a database OID, so it instead depends on DatabaseOidIndexId.
+ */
+ if (!criticalSharedRelcachesBuilt)
+ {
+ load_critical_index(DatabaseNameIndexId);
+ load_critical_index(DatabaseOidIndexId);
+
+#define NUM_CRITICAL_SHARED_INDEXES 2 /* fix if you change list above */
+
+ criticalSharedRelcachesBuilt = true;
+ }
+
+ /*
* Now, scan all the relcache entries and update anything that might be
* wrong in the results from formrdesc or the relcache cache file. If we
* faked up relcache entries using formrdesc, then read the real pg_class
@@ -2658,7 +2742,8 @@ RelationCacheInitializePhase2(void)
}
/*
- * Lastly, write out a new relcache cache file if one is needed.
+ * Lastly, write out new relcache cache files if needed. We don't bother
+ * to distinguish cases where only one of the two needs an update.
*/
if (needNewCacheFile)
{
@@ -2666,16 +2751,37 @@ RelationCacheInitializePhase2(void)
* Force all the catcaches to finish initializing and thereby open the
* catalogs and indexes they use. This will preload the relcache with
* entries for all the most important system catalogs and indexes, so
- * that the init file will be most useful for future backends.
+ * that the init files will be most useful for future backends.
*/
InitCatalogCachePhase2();
- /* now write the file */
- write_relcache_init_file();
+ /* reset initFileRelationIds list; we'll fill it during write */
+ initFileRelationIds = NIL;
+
+ /* now write the files */
+ write_relcache_init_file(true);
+ write_relcache_init_file(false);
}
}
/*
+ * Load one critical system index into the relcache
+ */
+static void
+load_critical_index(Oid indexoid)
+{
+ Relation ird;
+
+ LockRelationOid(indexoid, AccessShareLock);
+ ird = RelationBuildDesc(indexoid, NULL);
+ if (ird == NULL)
+ elog(PANIC, "could not open critical system index %u", indexoid);
+ ird->rd_isnailed = true;
+ ird->rd_refcnt = 1;
+ UnlockRelationOid(indexoid, AccessShareLock);
+}
+
+/*
* GetPgClassDescriptor -- get a predefined tuple descriptor for pg_class
* GetPgIndexDescriptor -- get a predefined tuple descriptor for pg_index
*
@@ -2688,7 +2794,8 @@ RelationCacheInitializePhase2(void)
* extracting fields.
*/
static TupleDesc
-BuildHardcodedDescriptor(int natts, Form_pg_attribute attrs, bool hasoids)
+BuildHardcodedDescriptor(int natts, const FormData_pg_attribute *attrs,
+ bool hasoids)
{
TupleDesc result;
MemoryContext oldcxt;
@@ -2745,6 +2852,9 @@ GetPgIndexDescriptor(void)
return pgindexdesc;
}
+/*
+ * Load any default attribute value definitions for the relation.
+ */
static void
AttrDefaultFetch(Relation relation)
{
@@ -2810,6 +2920,9 @@ AttrDefaultFetch(Relation relation)
ndef - found, RelationGetRelationName(relation));
}
+/*
+ * Load any check constraints for the relation.
+ */
static void
CheckConstraintFetch(Relation relation)
{
@@ -3310,7 +3423,10 @@ RelationGetIndexAttrBitmap(Relation relation)
* relation descriptors using sequential scans and write 'em to
* the initialization file for use by subsequent backends.
*
- * We could dispense with the initialization file and just build the
+ * As of Postgres 8.5, there is one local initialization file in each
+ * database, plus one shared initialization file for shared catalogs.
+ *
+ * We could dispense with the initialization files and just build the
* critical reldescs the hard way on every backend startup, but that
* slows down backend startup noticeably.
*
@@ -3318,24 +3434,26 @@ RelationGetIndexAttrBitmap(Relation relation)
* just the ones that are absolutely critical; this allows us to speed
* up backend startup by not having to build such entries the hard way.
* Presently, all the catalog and index entries that are referred to
- * by catcaches are stored in the initialization file.
+ * by catcaches are stored in the initialization files.
*
* The same mechanism that detects when catcache and relcache entries
* need to be invalidated (due to catalog updates) also arranges to
- * unlink the initialization file when its contents may be out of date.
- * The file will then be rebuilt during the next backend startup.
+ * unlink the initialization files when the contents may be out of date.
+ * The files will then be rebuilt during the next backend startup.
*/
/*
- * load_relcache_init_file -- attempt to load cache from the init file
+ * load_relcache_init_file -- attempt to load cache from the shared
+ * or local cache init file
*
- * If successful, return TRUE and set criticalRelcachesBuilt to true.
+ * If successful, return TRUE and set criticalRelcachesBuilt or
+ * criticalSharedRelcachesBuilt to true.
* If not successful, return FALSE.
*
* NOTE: we assume we are already switched into CacheMemoryContext.
*/
static bool
-load_relcache_init_file(void)
+load_relcache_init_file(bool shared)
{
FILE *fp;
char initfilename[MAXPGPATH];
@@ -3348,8 +3466,12 @@ load_relcache_init_file(void)
magic;
int i;
- snprintf(initfilename, sizeof(initfilename), "%s/%s",
- DatabasePath, RELCACHE_INIT_FILENAME);
+ if (shared)
+ snprintf(initfilename, sizeof(initfilename), "global/%s",
+ RELCACHE_INIT_FILENAME);
+ else
+ snprintf(initfilename, sizeof(initfilename), "%s/%s",
+ DatabasePath, RELCACHE_INIT_FILENAME);
fp = AllocateFile(initfilename, PG_BINARY_R);
if (fp == NULL)
@@ -3364,7 +3486,6 @@ load_relcache_init_file(void)
rels = (Relation *) palloc(max_rels * sizeof(Relation));
num_rels = 0;
nailed_rels = nailed_indexes = 0;
- initFileRelationIds = NIL;
/* check for correct magic number (compatible version) */
if (fread(&magic, 1, sizeof(magic), fp) != sizeof(magic))
@@ -3588,7 +3709,7 @@ load_relcache_init_file(void)
/*
* Rules and triggers are not saved (mainly because the internal
* format is complex and subject to change). They must be rebuilt if
- * needed by RelationCacheInitializePhase2. This is not expected to
+ * needed by RelationCacheInitializePhase3. This is not expected to
* be a big performance hit since few system catalogs have such. Ditto
* for index expressions and predicates.
*/
@@ -3632,9 +3753,18 @@ load_relcache_init_file(void)
* get the right number of nailed items? (This is a useful crosscheck in
* case the set of critical rels or indexes changes.)
*/
- if (nailed_rels != NUM_CRITICAL_RELS ||
- nailed_indexes != NUM_CRITICAL_INDEXES)
- goto read_failed;
+ if (shared)
+ {
+ if (nailed_rels != NUM_CRITICAL_SHARED_RELS ||
+ nailed_indexes != NUM_CRITICAL_SHARED_INDEXES)
+ goto read_failed;
+ }
+ else
+ {
+ if (nailed_rels != NUM_CRITICAL_LOCAL_RELS ||
+ nailed_indexes != NUM_CRITICAL_LOCAL_INDEXES)
+ goto read_failed;
+ }
/*
* OK, all appears well.
@@ -3645,14 +3775,18 @@ load_relcache_init_file(void)
{
RelationCacheInsert(rels[relno]);
/* also make a list of their OIDs, for RelationIdIsInInitFile */
- initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
- initFileRelationIds);
+ if (!shared)
+ initFileRelationIds = lcons_oid(RelationGetRelid(rels[relno]),
+ initFileRelationIds);
}
pfree(rels);
FreeFile(fp);
- criticalRelcachesBuilt = true;
+ if (shared)
+ criticalSharedRelcachesBuilt = true;
+ else
+ criticalRelcachesBuilt = true;
return true;
/*
@@ -3669,10 +3803,10 @@ read_failed:
/*
* Write out a new initialization file with the current contents
- * of the relcache.
+ * of the relcache (either shared rels or local rels, as indicated).
*/
static void
-write_relcache_init_file(void)
+write_relcache_init_file(bool shared)
{
FILE *fp;
char tempfilename[MAXPGPATH];
@@ -3688,10 +3822,20 @@ write_relcache_init_file(void)
* another backend starting at about the same time might crash trying to
* read the partially-complete file.
*/
- snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
- DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
- snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
- DatabasePath, RELCACHE_INIT_FILENAME);
+ if (shared)
+ {
+ snprintf(tempfilename, sizeof(tempfilename), "global/%s.%d",
+ RELCACHE_INIT_FILENAME, MyProcPid);
+ snprintf(finalfilename, sizeof(finalfilename), "global/%s",
+ RELCACHE_INIT_FILENAME);
+ }
+ else
+ {
+ snprintf(tempfilename, sizeof(tempfilename), "%s/%s.%d",
+ DatabasePath, RELCACHE_INIT_FILENAME, MyProcPid);
+ snprintf(finalfilename, sizeof(finalfilename), "%s/%s",
+ DatabasePath, RELCACHE_INIT_FILENAME);
+ }
unlink(tempfilename); /* in case it exists w/wrong permissions */
@@ -3719,17 +3863,19 @@ write_relcache_init_file(void)
elog(FATAL, "could not write init file");
/*
- * Write all the reldescs (in no particular order).
+ * Write all the appropriate reldescs (in no particular order).
*/
hash_seq_init(&status, RelationIdCache);
- initFileRelationIds = NIL;
-
while ((idhentry = (RelIdCacheEnt *) hash_seq_search(&status)) != NULL)
{
Relation rel = idhentry->reldesc;
Form_pg_class relform = rel->rd_rel;
+ /* ignore if not correct group */
+ if (relform->relisshared != shared)
+ continue;
+
/* first write the relcache entry proper */
write_item(rel, sizeof(RelationData), fp);
@@ -3788,10 +3934,13 @@ write_relcache_init_file(void)
}
/* also make a list of their OIDs, for RelationIdIsInInitFile */
- oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
- initFileRelationIds = lcons_oid(RelationGetRelid(rel),
- initFileRelationIds);
- MemoryContextSwitchTo(oldcxt);
+ if (!shared)
+ {
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+ initFileRelationIds = lcons_oid(RelationGetRelid(rel),
+ initFileRelationIds);
+ MemoryContextSwitchTo(oldcxt);
+ }
}
if (FreeFile(fp))
@@ -3852,7 +4001,7 @@ write_item(const void *data, Size len, FILE *fp)
/*
* Detect whether a given relation (identified by OID) is one of the ones
- * we store in the init file.
+ * we store in the local relcache init file.
*
* Note that we effectively assume that all backends running in a database
* would choose to store the same set of relations in the init file;
@@ -3868,7 +4017,7 @@ RelationIdIsInInitFile(Oid relationId)
/*
* Invalidate (remove) the init file during commit of a transaction that
* changed one or more of the relation cache entries that are kept in the
- * init file.
+ * local init file.
*
* We actually need to remove the init file twice: once just before sending
* the SI messages that include relcache inval for such relations, and once
@@ -3883,6 +4032,13 @@ RelationIdIsInInitFile(Oid relationId)
*
* Ignore any failure to unlink the file, since it might not be there if
* no backend has been started since the last removal.
+ *
+ * Notice this deals only with the local init file, not the shared init file.
+ * The reason is that there can never be a "significant" change to the
+ * relcache entry of a shared relation; the most that could happen is
+ * updates of noncritical fields such as relpages/reltuples. So, while
+ * it's worth updating the shared init file from time to time, it can never
+ * be invalid enough to make it necessary to remove it.
*/
void
RelationCacheInitFileInvalidate(bool beforeSend)
@@ -3914,23 +4070,94 @@ RelationCacheInitFileInvalidate(bool beforeSend)
}
/*
- * Remove the init file for a given database during postmaster startup.
+ * Remove the init files during postmaster startup.
*
- * We used to keep the init file across restarts, but that is unsafe in PITR
+ * We used to keep the init files across restarts, but that is unsafe in PITR
* scenarios, and even in simple crash-recovery cases there are windows for
- * the init file to become out-of-sync with the database. So now we just
- * remove it during startup and expect the first backend launch to rebuild it.
- * Of course, this has to happen in each database of the cluster. For
- * simplicity this is driven by flatfiles.c, which has to scan pg_database
- * anyway.
+ * the init files to become out-of-sync with the database. So now we just
+ * remove them during startup and expect the first backend launch to rebuild
+ * them. Of course, this has to happen in each database of the cluster.
*/
void
-RelationCacheInitFileRemove(const char *dbPath)
+RelationCacheInitFileRemove(void)
+{
+ const char *tblspcdir = "pg_tblspc";
+ DIR *dir;
+ struct dirent *de;
+ char path[MAXPGPATH];
+
+ /*
+ * We zap the shared cache file too. In theory it can't get out of sync
+ * enough to be a problem, but in data-corruption cases, who knows ...
+ */
+ snprintf(path, sizeof(path), "global/%s",
+ RELCACHE_INIT_FILENAME);
+ unlink_initfile(path);
+
+ /* Scan everything in the default tablespace */
+ RelationCacheInitFileRemoveInDir("base");
+
+ /* Scan the tablespace link directory to find non-default tablespaces */
+ dir = AllocateDir(tblspcdir);
+ if (dir == NULL)
+ {
+ elog(LOG, "could not open tablespace link directory \"%s\": %m",
+ tblspcdir);
+ return;
+ }
+
+ while ((de = ReadDir(dir, tblspcdir)) != NULL)
+ {
+ if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
+ {
+ /* Scan the tablespace dir for per-database dirs */
+ snprintf(path, sizeof(path), "%s/%s",
+ tblspcdir, de->d_name);
+ RelationCacheInitFileRemoveInDir(path);
+ }
+ }
+
+ FreeDir(dir);
+}
+
+/* Process one per-tablespace directory for RelationCacheInitFileRemove */
+static void
+RelationCacheInitFileRemoveInDir(const char *tblspcpath)
{
+ DIR *dir;
+ struct dirent *de;
char initfilename[MAXPGPATH];
- snprintf(initfilename, sizeof(initfilename), "%s/%s",
- dbPath, RELCACHE_INIT_FILENAME);
- unlink(initfilename);
- /* ignore any error, since it might not be there at all */
+ /* Scan the tablespace directory to find per-database directories */
+ dir = AllocateDir(tblspcpath);
+ if (dir == NULL)
+ {
+ elog(LOG, "could not open tablespace directory \"%s\": %m",
+ tblspcpath);
+ return;
+ }
+
+ while ((de = ReadDir(dir, tblspcpath)) != NULL)
+ {
+ if (strspn(de->d_name, "0123456789") == strlen(de->d_name))
+ {
+ /* Try to remove the init file in each database */
+ snprintf(initfilename, sizeof(initfilename), "%s/%s/%s",
+ tblspcpath, de->d_name, RELCACHE_INIT_FILENAME);
+ unlink_initfile(initfilename);
+ }
+ }
+
+ FreeDir(dir);
+}
+
+static void
+unlink_initfile(const char *initfilename)
+{
+ if (unlink(initfilename) < 0)
+ {
+ /* It might not be there, but log any error other than ENOENT */
+ if (errno != ENOENT)
+ elog(LOG, "could not remove cache file \"%s\": %m", initfilename);
+ }
}