diff options
author | Noah Misch <noah@leadboat.com> | 2024-09-24 15:25:18 -0700 |
---|---|---|
committer | Noah Misch <noah@leadboat.com> | 2024-09-24 15:25:18 -0700 |
commit | a07e03fd8fa7daf4d1356f7cb501ffe784ea6257 (patch) | |
tree | fa7a55ec1c78beffe2a209fd626697c4d2b24ff3 /src/backend/commands/vacuum.c | |
parent | dbf3f974ee04d24547690268b1fc2b7eb12b4ebc (diff) |
Fix data loss at inplace update after heap_update().
As previously-added tests demonstrated, heap_inplace_update() could
instead update an unrelated tuple of the same catalog. It could lose
the update. Losing relhasindex=t was a source of index corruption.
Inplace-updating commands like VACUUM will now wait for heap_update()
commands like GRANT TABLE and GRANT DATABASE. That isn't ideal, but a
long-running GRANT already hurts VACUUM progress more just by keeping an
XID running. The VACUUM will behave like a DELETE or UPDATE waiting for
the uncommitted change.
For implementation details, start at the systable_inplace_update_begin()
header comment and README.tuplock. Back-patch to v12 (all supported
versions). In back branches, retain a deprecated heap_inplace_update(),
for extensions.
Reported by Smolkin Grigory. Reviewed by Nitin Motiani, (in earlier
versions) Heikki Linnakangas, and (in earlier versions) Alexander
Lakhin.
Discussion: https://postgr.es/m/CAMp+ueZQz3yDk7qg42hk6-9gxniYbp-=bG2mgqecErqR5gGGOA@mail.gmail.com
Diffstat (limited to 'src/backend/commands/vacuum.c')
-rw-r--r-- | src/backend/commands/vacuum.c | 32 |
1 files changed, 21 insertions, 11 deletions
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 23aabdc90dc..ac8f5d9c259 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1417,7 +1417,9 @@ vac_update_relstats(Relation relation, { Oid relid = RelationGetRelid(relation); Relation rd; + ScanKeyData key[1]; HeapTuple ctup; + void *inplace_state; Form_pg_class pgcform; bool dirty, futurexid, @@ -1428,7 +1430,12 @@ vac_update_relstats(Relation relation, rd = table_open(RelationRelationId, RowExclusiveLock); /* Fetch a copy of the tuple to scribble on */ - ctup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid)); + ScanKeyInit(&key[0], + Anum_pg_class_oid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(relid)); + systable_inplace_update_begin(rd, ClassOidIndexId, true, + NULL, 1, key, &ctup, &inplace_state); if (!HeapTupleIsValid(ctup)) elog(ERROR, "pg_class entry for relid %u vanished during vacuuming", relid); @@ -1536,7 +1543,9 @@ vac_update_relstats(Relation relation, /* If anything changed, write out the tuple. */ if (dirty) - heap_inplace_update(rd, ctup); + systable_inplace_update_finish(inplace_state, ctup); + else + systable_inplace_update_cancel(inplace_state); table_close(rd, RowExclusiveLock); @@ -1588,6 +1597,7 @@ vac_update_datfrozenxid(void) bool bogus = false; bool dirty = false; ScanKeyData key[1]; + void *inplace_state; /* * Restrict this task to one backend per database. This avoids race @@ -1711,20 +1721,18 @@ vac_update_datfrozenxid(void) relation = table_open(DatabaseRelationId, RowExclusiveLock); /* - * Get the pg_database tuple to scribble on. Note that this does not - * directly rely on the syscache to avoid issues with flattened toast - * values for the in-place update. + * Fetch a copy of the tuple to scribble on. We could check the syscache + * tuple first. If that concluded !dirty, we'd avoid waiting on + * concurrent heap_update() and would avoid exclusive-locking the buffer. + * For now, don't optimize that. */ ScanKeyInit(&key[0], Anum_pg_database_oid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(MyDatabaseId)); - scan = systable_beginscan(relation, DatabaseOidIndexId, true, - NULL, 1, key); - tuple = systable_getnext(scan); - tuple = heap_copytuple(tuple); - systable_endscan(scan); + systable_inplace_update_begin(relation, DatabaseOidIndexId, true, + NULL, 1, key, &tuple, &inplace_state); if (!HeapTupleIsValid(tuple)) elog(ERROR, "could not find tuple for database %u", MyDatabaseId); @@ -1758,7 +1766,9 @@ vac_update_datfrozenxid(void) newMinMulti = dbform->datminmxid; if (dirty) - heap_inplace_update(relation, tuple); + systable_inplace_update_finish(inplace_state, tuple); + else + systable_inplace_update_cancel(inplace_state); heap_freetuple(tuple); table_close(relation, RowExclusiveLock); |