diff options
| author | Alvaro Herrera <alvherre@alvh.no-ip.org> | 2013-01-23 12:04:59 -0300 |
|---|---|---|
| committer | Alvaro Herrera <alvherre@alvh.no-ip.org> | 2013-01-23 12:04:59 -0300 |
| commit | 0ac5ad5134f2769ccbaefec73844f8504c4d6182 (patch) | |
| tree | d9b0ba4a1b65a52030820efe68a9c937c46aad1f /src/include/catalog | |
| parent | f925c79b9f36c54b67053ade5ad225a75b8dc803 (diff) | |
Improve concurrency of foreign key locking
This patch introduces two additional lock modes for tuples: "SELECT FOR
KEY SHARE" and "SELECT FOR NO KEY UPDATE". These don't block each
other, in contrast with already existing "SELECT FOR SHARE" and "SELECT
FOR UPDATE". UPDATE commands that do not modify the values stored in
the columns that are part of the key of the tuple now grab a SELECT FOR
NO KEY UPDATE lock on the tuple, allowing them to proceed concurrently
with tuple locks of the FOR KEY SHARE variety.
Foreign key triggers now use FOR KEY SHARE instead of FOR SHARE; this
means the concurrency improvement applies to them, which is the whole
point of this patch.
The added tuple lock semantics require some rejiggering of the multixact
module, so that the locking level that each transaction is holding can
be stored alongside its Xid. Also, multixacts now need to persist
across server restarts and crashes, because they can now represent not
only tuple locks, but also tuple updates. This means we need more
careful tracking of lifetime of pg_multixact SLRU files; since they now
persist longer, we require more infrastructure to figure out when they
can be removed. pg_upgrade also needs to be careful to copy
pg_multixact files over from the old server to the new, or at least part
of multixact.c state, depending on the versions of the old and new
servers.
Tuple time qualification rules (HeapTupleSatisfies routines) need to be
careful not to consider tuples with the "is multi" infomask bit set as
being only locked; they might need to look up MultiXact values (i.e.
possibly do pg_multixact I/O) to find out the Xid that updated a tuple,
whereas they previously were assured to only use information readily
available from the tuple header. This is considered acceptable, because
the extra I/O would involve cases that would previously cause some
commands to block waiting for concurrent transactions to finish.
Another important change is the fact that locking tuples that have
previously been updated causes the future versions to be marked as
locked, too; this is essential for correctness of foreign key checks.
This causes additional WAL-logging, also (there was previously a single
WAL record for a locked tuple; now there are as many as updated copies
of the tuple there exist.)
With all this in place, contention related to tuples being checked by
foreign key rules should be much reduced.
As a bonus, the old behavior that a subtransaction grabbing a stronger
tuple lock than the parent (sub)transaction held on a given tuple and
later aborting caused the weaker lock to be lost, has been fixed.
Many new spec files were added for isolation tester framework, to ensure
overall behavior is sane. There's probably room for several more tests.
There were several reviewers of this patch; in particular, Noah Misch
and Andres Freund spent considerable time in it. Original idea for the
patch came from Simon Riggs, after a problem report by Joel Jacobson.
Most code is from me, with contributions from Marti Raudsepp, Alexander
Shulgin, Noah Misch and Andres Freund.
This patch was discussed in several pgsql-hackers threads; the most
important start at the following message-ids:
AANLkTimo9XVcEzfiBR-ut3KVNDkjm2Vxh+t8kAmWjPuv@mail.gmail.com
1290721684-sup-3951@alvh.no-ip.org
1294953201-sup-2099@alvh.no-ip.org
1320343602-sup-2290@alvh.no-ip.org
1339690386-sup-8927@alvh.no-ip.org
4FE5FF020200002500048A3D@gw.wicourts.gov
4FEAB90A0200002500048B7D@gw.wicourts.gov
Diffstat (limited to 'src/include/catalog')
| -rw-r--r-- | src/include/catalog/catversion.h | 2 | ||||
| -rw-r--r-- | src/include/catalog/pg_class.h | 24 | ||||
| -rw-r--r-- | src/include/catalog/pg_control.h | 4 | ||||
| -rw-r--r-- | src/include/catalog/pg_database.h | 10 | ||||
| -rw-r--r-- | src/include/catalog/pg_proc.h | 2 |
5 files changed, 27 insertions, 15 deletions
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index a676793566d..4b8fa0175b3 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201301211 +#define CATALOG_VERSION_NO 201301231 #endif diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index fcc293899ab..820552f0134 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -67,6 +67,8 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO bool relhastriggers; /* has (or has had) any TRIGGERs */ bool relhassubclass; /* has (or has had) derived classes */ TransactionId relfrozenxid; /* all Xids < this are frozen in this rel */ + TransactionId relminmxid; /* all multixacts in this rel are >= this. + * this is really a MultiXactId */ #ifdef CATALOG_VARLEN /* variable-length fields start here */ /* NOTE: These fields are not present in a relcache entry's rd_rel field. */ @@ -77,7 +79,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO /* Size of fixed part of pg_class tuples, not counting var-length fields */ #define CLASS_TUPLE_SIZE \ - (offsetof(FormData_pg_class,relfrozenxid) + sizeof(TransactionId)) + (offsetof(FormData_pg_class,relminmxid) + sizeof(TransactionId)) /* ---------------- * Form_pg_class corresponds to a pointer to a tuple with @@ -91,7 +93,7 @@ typedef FormData_pg_class *Form_pg_class; * ---------------- */ -#define Natts_pg_class 27 +#define Natts_pg_class 28 #define Anum_pg_class_relname 1 #define Anum_pg_class_relnamespace 2 #define Anum_pg_class_reltype 3 @@ -117,8 +119,9 @@ typedef FormData_pg_class *Form_pg_class; #define Anum_pg_class_relhastriggers 23 #define Anum_pg_class_relhassubclass 24 #define Anum_pg_class_relfrozenxid 25 -#define Anum_pg_class_relacl 26 -#define Anum_pg_class_reloptions 27 +#define Anum_pg_class_relminmxid 26 +#define Anum_pg_class_relacl 27 +#define Anum_pg_class_reloptions 28 /* ---------------- * initial contents of pg_class @@ -129,14 +132,17 @@ typedef FormData_pg_class *Form_pg_class; * ---------------- */ -/* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */ -DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f 3 _null_ _null_ )); +/* + * Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId; + * similarly, "1" in relminmxid stands for FirstMultiXactId + */ +DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f 3 1 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 _null_ _null_ )); +DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 1 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ )); +DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 1 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ )); +DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 28 0 t f f f f 3 1 _null_ _null_ )); DESCR(""); diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index ead3a6e4bad..e4a9abe7bc5 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -21,7 +21,7 @@ /* Version identifier for this pg_control format */ -#define PG_CONTROL_VERSION 932 +#define PG_CONTROL_VERSION 933 /* * Body of CheckPoint XLOG records. This is declared here because we keep @@ -41,6 +41,8 @@ typedef struct CheckPoint MultiXactOffset nextMultiOffset; /* next free MultiXact offset */ TransactionId oldestXid; /* cluster-wide minimum datfrozenxid */ Oid oldestXidDB; /* database with minimum datfrozenxid */ + MultiXactId oldestMulti; /* cluster-wide minimum datminmxid */ + Oid oldestMultiDB; /* database with minimum datminmxid */ pg_time_t time; /* time stamp of checkpoint */ /* diff --git a/src/include/catalog/pg_database.h b/src/include/catalog/pg_database.h index 4010959b029..baeddcd12a1 100644 --- a/src/include/catalog/pg_database.h +++ b/src/include/catalog/pg_database.h @@ -41,6 +41,7 @@ CATALOG(pg_database,1262) BKI_SHARED_RELATION BKI_ROWTYPE_OID(1248) BKI_SCHEMA_M int32 datconnlimit; /* max connections allowed (-1=no limit) */ Oid datlastsysoid; /* highest OID to consider a system OID */ TransactionId datfrozenxid; /* all Xids < this are frozen in this DB */ + TransactionId datminmxid; /* all multixacts in the DB are >= this */ Oid dattablespace; /* default table space for this DB */ #ifdef CATALOG_VARLEN /* variable-length fields start here */ @@ -59,7 +60,7 @@ typedef FormData_pg_database *Form_pg_database; * compiler constants for pg_database * ---------------- */ -#define Natts_pg_database 12 +#define Natts_pg_database 13 #define Anum_pg_database_datname 1 #define Anum_pg_database_datdba 2 #define Anum_pg_database_encoding 3 @@ -70,10 +71,11 @@ typedef FormData_pg_database *Form_pg_database; #define Anum_pg_database_datconnlimit 8 #define Anum_pg_database_datlastsysoid 9 #define Anum_pg_database_datfrozenxid 10 -#define Anum_pg_database_dattablespace 11 -#define Anum_pg_database_datacl 12 +#define Anum_pg_database_datminmxid 11 +#define Anum_pg_database_dattablespace 12 +#define Anum_pg_database_datacl 13 -DATA(insert OID = 1 ( template1 PGUID ENCODING "LC_COLLATE" "LC_CTYPE" t t -1 0 0 1663 _null_)); +DATA(insert OID = 1 ( template1 PGUID ENCODING "LC_COLLATE" "LC_CTYPE" t t -1 0 0 1 1663 _null_)); SHDESCR("default template for new databases"); #define TemplateDbOid 1 diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 010605d774c..028e1684ff0 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -2909,6 +2909,8 @@ DATA(insert OID = 1371 ( pg_lock_status PGNSP PGUID 12 1 1000 0 0 f f f f t t DESCR("view system lock information"); DATA(insert OID = 1065 ( pg_prepared_xact PGNSP PGUID 12 1 1000 0 0 f f f f t t v 0 0 2249 "" "{28,25,1184,26,26}" "{o,o,o,o,o}" "{transaction,gid,prepared,ownerid,dbid}" _null_ pg_prepared_xact _null_ _null_ _null_ )); DESCR("view two-phase transactions"); +DATA(insert OID = 3819 ( pg_get_multixact_members PGNSP PGUID 12 1 1000 0 0 f f f f t t v 1 0 2249 "28" "{28,28,25}" "{i,o,o}" "{multixid,xid,mode}" _null_ pg_get_multixact_members _null_ _null_ _null_ )); +DESCR("view members of a multixactid"); DATA(insert OID = 3537 ( pg_describe_object PGNSP PGUID 12 1 0 0 0 f f f f t f s 3 0 25 "26 26 23" _null_ _null_ _null_ _null_ pg_describe_object _null_ _null_ _null_ )); DESCR("get identification of SQL object"); |
