From 1c6821be31f91ab92547a8ed4246762c8cefb1b3 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 30 Jun 2014 10:13:48 +0300 Subject: Fix and enhance the assertion of no palloc's in a critical section. The assertion failed if WAL_DEBUG or LWLOCK_STATS was enabled; fix that by using separate memory contexts for the allocations made within those code blocks. This patch introduces a mechanism for marking any memory context as allowed in a critical section. Previously ErrorContext was exempt as a special case. Instead of a blanket exception of the checkpointer process, only exempt the memory context used for the pending ops hash table. --- src/backend/postmaster/checkpointer.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'src/backend/postmaster/checkpointer.c') diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c index 2ac3061d974..6c814ba0be8 100644 --- a/src/backend/postmaster/checkpointer.c +++ b/src/backend/postmaster/checkpointer.c @@ -1305,19 +1305,6 @@ AbsorbFsyncRequests(void) if (!AmCheckpointerProcess()) return; - /* - * We have to PANIC if we fail to absorb all the pending requests (eg, - * because our hashtable runs out of memory). This is because the system - * cannot run safely if we are unable to fsync what we have been told to - * fsync. Fortunately, the hashtable is so small that the problem is - * quite unlikely to arise in practice. - */ - START_CRIT_SECTION(); - - /* - * We try to avoid holding the lock for a long time by copying the request - * array. - */ LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE); /* Transfer stats counts into pending pgstats message */ @@ -1327,12 +1314,25 @@ AbsorbFsyncRequests(void) CheckpointerShmem->num_backend_writes = 0; CheckpointerShmem->num_backend_fsync = 0; + /* + * We try to avoid holding the lock for a long time by copying the request + * array, and processing the requests after releasing the lock. + * + * Once we have cleared the requests from shared memory, we have to PANIC + * if we then fail to absorb them (eg, because our hashtable runs out of + * memory). This is because the system cannot run safely if we are unable + * to fsync what we have been told to fsync. Fortunately, the hashtable + * is so small that the problem is quite unlikely to arise in practice. + */ n = CheckpointerShmem->num_requests; if (n > 0) { requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest)); memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest)); } + + START_CRIT_SECTION(); + CheckpointerShmem->num_requests = 0; LWLockRelease(CheckpointerCommLock); @@ -1340,10 +1340,10 @@ AbsorbFsyncRequests(void) for (request = requests; n > 0; request++, n--) RememberFsyncRequest(request->rnode, request->forknum, request->segno); + END_CRIT_SECTION(); + if (requests) pfree(requests); - - END_CRIT_SECTION(); } /* -- cgit v1.2.3