From 5865150b6d535ecea241e9ad3038564cb7768b9a Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Wed, 27 Aug 2025 19:12:11 -0400 Subject: aio: Stop using enum bitfields due to bad code generation During an investigation into rather odd aio related errors on macos, observed by Alexander and Konstantin, we started to wonder if bitfield access is related to the error. At the moment it looks like it is related, we cannot reproduce the failures when replacing the bitfields. In addition, the problem can only be reproduced with some compiler [versions] and not everyone has been able to reproduce the issue. The observed problem is that, very rarely, PgAioHandle->{state,target} are in an inconsistent state, after having been checked to be in a valid state not long before, triggering an assertion failure. Unfortunately, this could be caused by wrong compiler code generation or somehow of missing memory barriers - we don't really know. In theory there should not be any concurrent write access to the handle in the state the bug is triggered, as the handle was idle and is just being initialized. Separately from the bug, we observed that at least gcc and clang generate rather terrible code for the bitfield access. Even if it's not clear if the observed assertion failure is actually caused by the bitfield somehow, the bad code generation alone is sufficient reason to stop using bitfields. Therefore, replace the enum bitfields with uint8s and instead cast in each switch statement. Reported-by: Alexander Lakhin Reported-by: Konstantin Knizhnik Discussion: https://postgr.es/m/1500090.1745443021@sss.pgh.pa.us Backpatch-through: 18 --- src/backend/storage/aio/aio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/backend/storage/aio/aio.c') diff --git a/src/backend/storage/aio/aio.c b/src/backend/storage/aio/aio.c index 3643f27ad6e..87d7136a936 100644 --- a/src/backend/storage/aio/aio.c +++ b/src/backend/storage/aio/aio.c @@ -275,7 +275,7 @@ pgaio_io_release_resowner(dlist_node *ioh_node, bool on_error) ResourceOwnerForgetAioHandle(ioh->resowner, &ioh->resowner_node); ioh->resowner = NULL; - switch (ioh->state) + switch ((PgAioHandleState) ioh->state) { case PGAIO_HS_IDLE: elog(ERROR, "unexpected"); @@ -600,7 +600,7 @@ pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation) if (pgaio_io_was_recycled(ioh, ref_generation, &state)) return; - switch (state) + switch ((PgAioHandleState) state) { case PGAIO_HS_IDLE: case PGAIO_HS_HANDED_OUT: @@ -825,7 +825,7 @@ pgaio_io_wait_for_free(void) &pgaio_my_backend->in_flight_ios); uint64 generation = ioh->generation; - switch (ioh->state) + switch ((PgAioHandleState) ioh->state) { /* should not be in in-flight list */ case PGAIO_HS_IDLE: @@ -905,7 +905,7 @@ static const char * pgaio_io_state_get_name(PgAioHandleState s) { #define PGAIO_HS_TOSTR_CASE(sym) case PGAIO_HS_##sym: return #sym - switch (s) + switch ((PgAioHandleState) s) { PGAIO_HS_TOSTR_CASE(IDLE); PGAIO_HS_TOSTR_CASE(HANDED_OUT); @@ -930,7 +930,7 @@ pgaio_io_get_state_name(PgAioHandle *ioh) const char * pgaio_result_status_string(PgAioResultStatus rs) { - switch (rs) + switch ((PgAioResultStatus) rs) { case PGAIO_RS_UNKNOWN: return "UNKNOWN"; -- cgit v1.2.3