diff options
Diffstat (limited to 'src/backend')
| -rw-r--r-- | src/backend/commands/tablecmds.c | 11 | ||||
| -rw-r--r-- | src/backend/storage/smgr/md.c | 27 | ||||
| -rw-r--r-- | src/backend/utils/cache/relcache.c | 66 |
3 files changed, 93 insertions, 11 deletions
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 7fbee0c1f71..e7aef2f6b08 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -40,6 +40,7 @@ #include "catalog/pg_depend.h" #include "catalog/pg_foreign_table.h" #include "catalog/pg_inherits.h" +#include "catalog/pg_largeobject.h" #include "catalog/pg_namespace.h" #include "catalog/pg_opclass.h" #include "catalog/pg_statistic_ext.h" @@ -2185,7 +2186,15 @@ truncate_check_rel(Oid relid, Form_pg_class reltuple) (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table", relname))); - if (!allowSystemTableMods && IsSystemClass(relid, reltuple)) + /* + * Most system catalogs can't be truncated at all, or at least not unless + * allow_system_table_mods=on. As an exception, however, we allow + * pg_largeobject to be truncated as part of pg_upgrade, because we need + * to change its relfilenode to match the old cluster, and allowing a + * TRUNCATE command to be executed is the easiest way of doing that. + */ + if (!allowSystemTableMods && IsSystemClass(relid, reltuple) + && (!IsBinaryUpgrade || relid != LargeObjectRelationId)) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied: \"%s\" is a system catalog", diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 3998296a62f..3deac496eed 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -319,6 +319,7 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forkNum, bool isRedo) { char *path; int ret; + BlockNumber segno = 0; path = relpath(rlocator, forkNum); @@ -353,8 +354,22 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forkNum, bool isRedo) /* Prevent other backends' fds from holding on to the disk space */ ret = do_truncate(path); - /* Register request to unlink first segment later */ - register_unlink_segment(rlocator, forkNum, 0 /* first seg */ ); + /* + * Except during a binary upgrade, register request to unlink first + * segment later, rather than now. + * + * If we're performing a binary upgrade, the dangers described in the + * header comments for mdunlink() do not exist, since after a crash + * or even a simple ERROR, the upgrade fails and the whole new cluster + * must be recreated from scratch. And, on the other hand, it is + * important to remove the files from disk immediately, because we + * may be about to reuse the same relfilenumber. + */ + if (!IsBinaryUpgrade) + { + register_unlink_segment(rlocator, forkNum, 0 /* first seg */ ); + ++segno; + } } /* @@ -363,15 +378,17 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forkNum, bool isRedo) if (ret >= 0) { char *segpath = (char *) palloc(strlen(path) + 12); - BlockNumber segno; /* * Note that because we loop until getting ENOENT, we will correctly * remove all inactive segments as well as active ones. */ - for (segno = 1;; segno++) + for (;; segno++) { - sprintf(segpath, "%s.%u", path, segno); + if (segno == 0) + strcpy(segpath, path); + else + sprintf(segpath, "%s.%u", path, segno); if (!RelFileLocatorBackendIsTemp(rlocator)) { diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index bdb771d278f..00dc0f24037 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -41,6 +41,7 @@ #include "access/tupdesc_details.h" #include "access/xact.h" #include "access/xlog.h" +#include "catalog/binary_upgrade.h" #include "catalog/catalog.h" #include "catalog/indexing.h" #include "catalog/namespace.h" @@ -3707,9 +3708,36 @@ RelationSetNewRelfilenumber(Relation relation, char persistence) TransactionId freezeXid = InvalidTransactionId; RelFileLocator newrlocator; - /* Allocate a new relfilenumber */ - newrelfilenumber = GetNewRelFileNumber(relation->rd_rel->reltablespace, - NULL, persistence); + if (!IsBinaryUpgrade) + { + /* Allocate a new relfilenumber */ + newrelfilenumber = GetNewRelFileNumber(relation->rd_rel->reltablespace, + NULL, persistence); + } + else if (relation->rd_rel->relkind == RELKIND_INDEX) + { + if (!OidIsValid(binary_upgrade_next_index_pg_class_relfilenumber)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("index relfilenumber value not set when in binary upgrade mode"))); + + newrelfilenumber = binary_upgrade_next_index_pg_class_relfilenumber; + binary_upgrade_next_index_pg_class_relfilenumber = InvalidOid; + } + else if (relation->rd_rel->relkind == RELKIND_RELATION) + { + if (!OidIsValid(binary_upgrade_next_heap_pg_class_relfilenumber)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("heap relfilenumber value not set when in binary upgrade mode"))); + + newrelfilenumber = binary_upgrade_next_heap_pg_class_relfilenumber; + binary_upgrade_next_heap_pg_class_relfilenumber = InvalidOid; + } + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unexpected request for new relfilenumber in binary upgrade mode"))); /* * Get a writable copy of the pg_class tuple for the given relation. @@ -3724,9 +3752,37 @@ RelationSetNewRelfilenumber(Relation relation, char persistence) classform = (Form_pg_class) GETSTRUCT(tuple); /* - * Schedule unlinking of the old storage at transaction commit. + * Schedule unlinking of the old storage at transaction commit, except + * when performing a binary upgrade, when we must do it immediately. */ - RelationDropStorage(relation); + if (IsBinaryUpgrade) + { + SMgrRelation srel; + + /* + * During a binary upgrade, we use this code path to ensure that + * pg_largeobject and its index have the same relfilenumbers as in + * the old cluster. This is necessary because pg_upgrade treats + * pg_largeobject like a user table, not a system table. It is however + * possible that a table or index may need to end up with the same + * relfilenumber in the new cluster as what it had in the old cluster. + * Hence, we can't wait until commit time to remove the old storage. + * + * In general, this function needs to have transactional semantics, + * and removing the old storage before commit time surely isn't. + * However, it doesn't really matter, because if a binary upgrade + * fails at this stage, the new cluster will need to be recreated + * anyway. + */ + srel = smgropen(relation->rd_locator, relation->rd_backend); + smgrdounlinkall(&srel, 1, false); + smgrclose(srel); + } + else + { + /* Not a binary upgrade, so just schedule it to happen later. */ + RelationDropStorage(relation); + } /* * Create storage for the main fork of the new relfilenumber. If it's a |
