| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
 | /*-------------------------------------------------------------------------
 *
 * lwlock.c
 *	  Lightweight lock manager
 *
 * Lightweight locks are intended primarily to provide mutual exclusion of
 * access to shared-memory data structures.  Therefore, they offer both
 * exclusive and shared lock modes (to support read/write and read-only
 * access to a shared object).	There are few other frammishes.  User-level
 * locking should be done with the full lock manager --- which depends on
 * an LWLock to protect its shared state.
 *
 *
 * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
 *	  $Header: /cvsroot/pgsql/src/backend/storage/lmgr/lwlock.c,v 1.14 2002/09/25 20:31:40 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"
#include "access/clog.h"
#include "storage/lwlock.h"
#include "storage/proc.h"
#include "storage/spin.h"
typedef struct LWLock
{
	slock_t		mutex;			/* Protects LWLock and queue of PGPROCs */
	bool		releaseOK;		/* T if ok to release waiters */
	char		exclusive;		/* # of exclusive holders (0 or 1) */
	int			shared;			/* # of shared holders (0..MaxBackends) */
	PGPROC	   *head;			/* head of list of waiting PGPROCs */
	PGPROC	   *tail;			/* tail of list of waiting PGPROCs */
	/* tail is undefined when head is NULL */
} LWLock;
/*
 * This points to the array of LWLocks in shared memory.  Backends inherit
 * the pointer by fork from the postmaster.  LWLockIds are indexes into
 * the array.
 */
static LWLock *LWLockArray = NULL;
/* shared counter for dynamic allocation of LWLockIds */
static int *LWLockCounter;
/*
 * We use this structure to keep track of locked LWLocks for release
 * during error recovery.  The maximum size could be determined at runtime
 * if necessary, but it seems unlikely that more than a few locks could
 * ever be held simultaneously.
 */
#define MAX_SIMUL_LWLOCKS	100
static int	num_held_lwlocks = 0;
static LWLockId held_lwlocks[MAX_SIMUL_LWLOCKS];
#ifdef LOCK_DEBUG
bool		Trace_lwlocks = false;
inline static void
PRINT_LWDEBUG(const char *where, LWLockId lockid, const volatile LWLock *lock)
{
	if (Trace_lwlocks)
		elog(LOG, "%s(%d): excl %d shared %d head %p rOK %d",
			 where, (int) lockid,
			 (int) lock->exclusive, lock->shared, lock->head,
			 (int) lock->releaseOK);
}
inline static void
LOG_LWDEBUG(const char *where, LWLockId lockid, const char *msg)
{
	if (Trace_lwlocks)
		elog(LOG, "%s(%d): %s", where, (int) lockid, msg);
}
#else							/* not LOCK_DEBUG */
#define PRINT_LWDEBUG(a,b,c)
#define LOG_LWDEBUG(a,b,c)
#endif   /* LOCK_DEBUG */
/*
 * Compute number of LWLocks to allocate.
 */
int
NumLWLocks(void)
{
	int			numLocks;
	/*
	 * Possibly this logic should be spread out among the affected
	 * modules, the same way that shmem space estimation is done.  But for
	 * now, there are few enough users of LWLocks that we can get away
	 * with just keeping the knowledge here.
	 */
	/* Predefined LWLocks */
	numLocks = (int) NumFixedLWLocks;
	/* bufmgr.c needs two for each shared buffer */
	numLocks += 2 * NBuffers;
	/* clog.c needs one per CLOG buffer */
	numLocks += NUM_CLOG_BUFFERS;
	/* Perhaps create a few more for use by user-defined modules? */
	return numLocks;
}
/*
 * Compute shmem space needed for LWLocks.
 */
int
LWLockShmemSize(void)
{
	int			numLocks = NumLWLocks();
	uint32		spaceLocks;
	/* Allocate the LWLocks plus space for shared allocation counter. */
	spaceLocks = numLocks * sizeof(LWLock) + 2 * sizeof(int);
	spaceLocks = MAXALIGN(spaceLocks);
	return (int) spaceLocks;
}
/*
 * Allocate shmem space for LWLocks and initialize the locks.
 */
void
CreateLWLocks(void)
{
	int			numLocks = NumLWLocks();
	uint32		spaceLocks = LWLockShmemSize();
	LWLock	   *lock;
	int			id;
	/* Allocate space */
	LWLockArray = (LWLock *) ShmemAlloc(spaceLocks);
	/*
	 * Initialize all LWLocks to "unlocked" state
	 */
	for (id = 0, lock = LWLockArray; id < numLocks; id++, lock++)
	{
		SpinLockInit(&lock->mutex);
		lock->releaseOK = true;
		lock->exclusive = 0;
		lock->shared = 0;
		lock->head = NULL;
		lock->tail = NULL;
	}
	/*
	 * Initialize the dynamic-allocation counter at the end of the array
	 */
	LWLockCounter = (int *) lock;
	LWLockCounter[0] = (int) NumFixedLWLocks;
	LWLockCounter[1] = numLocks;
}
/*
 * LWLockAssign - assign a dynamically-allocated LWLock number
 *
 * NB: we do not currently try to interlock this.  Could perhaps use
 * ShmemLock spinlock if there were any need to assign LWLockIds after
 * shmem setup.
 */
LWLockId
LWLockAssign(void)
{
	if (LWLockCounter[0] >= LWLockCounter[1])
		elog(FATAL, "No more LWLockIds available");
	return (LWLockId) (LWLockCounter[0]++);
}
/*
 * LWLockAcquire - acquire a lightweight lock in the specified mode
 *
 * If the lock is not available, sleep until it is.
 *
 * Side effect: cancel/die interrupts are held off until lock release.
 */
void
LWLockAcquire(LWLockId lockid, LWLockMode mode)
{
	volatile LWLock *lock = LWLockArray + lockid;
	PGPROC	   *proc = MyProc;
	bool		retry = false;
	int			extraWaits = 0;
	PRINT_LWDEBUG("LWLockAcquire", lockid, lock);
	/*
	 * We can't wait if we haven't got a PGPROC.  This should only occur
	 * during bootstrap or shared memory initialization.  Put an Assert
	 * here to catch unsafe coding practices.
	 */
	Assert(!(proc == NULL && IsUnderPostmaster));
	/*
	 * Lock out cancel/die interrupts until we exit the code section
	 * protected by the LWLock.  This ensures that interrupts will not
	 * interfere with manipulations of data structures in shared memory.
	 */
	HOLD_INTERRUPTS();
	/*
	 * Loop here to try to acquire lock after each time we are signaled by
	 * LWLockRelease.
	 *
	 * NOTE: it might seem better to have LWLockRelease actually grant us the
	 * lock, rather than retrying and possibly having to go back to sleep.
	 * But in practice that is no good because it means a process swap for
	 * every lock acquisition when two or more processes are contending
	 * for the same lock.  Since LWLocks are normally used to protect
	 * not-very-long sections of computation, a process needs to be able
	 * to acquire and release the same lock many times during a single CPU
	 * time slice, even in the presence of contention.	The efficiency of
	 * being able to do that outweighs the inefficiency of sometimes
	 * wasting a process dispatch cycle because the lock is not free when
	 * a released waiter finally gets to run.  See pgsql-hackers archives
	 * for 29-Dec-01.
	 */
	for (;;)
	{
		bool		mustwait;
		/* Acquire mutex.  Time spent holding mutex should be short! */
		SpinLockAcquire_NoHoldoff(&lock->mutex);
		/* If retrying, allow LWLockRelease to release waiters again */
		if (retry)
			lock->releaseOK = true;
		/* If I can get the lock, do so quickly. */
		if (mode == LW_EXCLUSIVE)
		{
			if (lock->exclusive == 0 && lock->shared == 0)
			{
				lock->exclusive++;
				mustwait = false;
			}
			else
				mustwait = true;
		}
		else
		{
			if (lock->exclusive == 0)
			{
				lock->shared++;
				mustwait = false;
			}
			else
				mustwait = true;
		}
		if (!mustwait)
			break;				/* got the lock */
		/*
		 * Add myself to wait queue.
		 *
		 * If we don't have a PGPROC structure, there's no way to wait. This
		 * should never occur, since MyProc should only be null during
		 * shared memory initialization.
		 */
		if (proc == NULL)
			elog(FATAL, "LWLockAcquire: can't wait without a PGPROC structure");
		proc->lwWaiting = true;
		proc->lwExclusive = (mode == LW_EXCLUSIVE);
		proc->lwWaitLink = NULL;
		if (lock->head == NULL)
			lock->head = proc;
		else
			lock->tail->lwWaitLink = proc;
		lock->tail = proc;
		/* Can release the mutex now */
		SpinLockRelease_NoHoldoff(&lock->mutex);
		/*
		 * Wait until awakened.
		 *
		 * Since we share the process wait semaphore with the regular lock
		 * manager and ProcWaitForSignal, and we may need to acquire an
		 * LWLock while one of those is pending, it is possible that we
		 * get awakened for a reason other than being signaled by
		 * LWLockRelease. If so, loop back and wait again.	Once we've
		 * gotten the LWLock, re-increment the sema by the number of
		 * additional signals received, so that the lock manager or signal
		 * manager will see the received signal when it next waits.
		 */
		LOG_LWDEBUG("LWLockAcquire", lockid, "waiting");
		for (;;)
		{
			/* "false" means cannot accept cancel/die interrupt here. */
			PGSemaphoreLock(&proc->sem, false);
			if (!proc->lwWaiting)
				break;
			extraWaits++;
		}
		LOG_LWDEBUG("LWLockAcquire", lockid, "awakened");
		/* Now loop back and try to acquire lock again. */
		retry = true;
	}
	/* We are done updating shared state of the lock itself. */
	SpinLockRelease_NoHoldoff(&lock->mutex);
	/* Add lock to list of locks held by this backend */
	Assert(num_held_lwlocks < MAX_SIMUL_LWLOCKS);
	held_lwlocks[num_held_lwlocks++] = lockid;
	/*
	 * Fix the process wait semaphore's count for any absorbed wakeups.
	 */
	while (extraWaits-- > 0)
		PGSemaphoreUnlock(&proc->sem);
}
/*
 * LWLockConditionalAcquire - acquire a lightweight lock in the specified mode
 *
 * If the lock is not available, return FALSE with no side-effects.
 *
 * If successful, cancel/die interrupts are held off until lock release.
 */
bool
LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
{
	volatile LWLock *lock = LWLockArray + lockid;
	bool		mustwait;
	PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, lock);
	/*
	 * Lock out cancel/die interrupts until we exit the code section
	 * protected by the LWLock.  This ensures that interrupts will not
	 * interfere with manipulations of data structures in shared memory.
	 */
	HOLD_INTERRUPTS();
	/* Acquire mutex.  Time spent holding mutex should be short! */
	SpinLockAcquire_NoHoldoff(&lock->mutex);
	/* If I can get the lock, do so quickly. */
	if (mode == LW_EXCLUSIVE)
	{
		if (lock->exclusive == 0 && lock->shared == 0)
		{
			lock->exclusive++;
			mustwait = false;
		}
		else
			mustwait = true;
	}
	else
	{
		if (lock->exclusive == 0)
		{
			lock->shared++;
			mustwait = false;
		}
		else
			mustwait = true;
	}
	/* We are done updating shared state of the lock itself. */
	SpinLockRelease_NoHoldoff(&lock->mutex);
	if (mustwait)
	{
		/* Failed to get lock, so release interrupt holdoff */
		RESUME_INTERRUPTS();
		LOG_LWDEBUG("LWLockConditionalAcquire", lockid, "failed");
	}
	else
	{
		/* Add lock to list of locks held by this backend */
		Assert(num_held_lwlocks < MAX_SIMUL_LWLOCKS);
		held_lwlocks[num_held_lwlocks++] = lockid;
	}
	return !mustwait;
}
/*
 * LWLockRelease - release a previously acquired lock
 */
void
LWLockRelease(LWLockId lockid)
{
	volatile LWLock *lock = LWLockArray + lockid;
	PGPROC	   *head;
	PGPROC	   *proc;
	int			i;
	PRINT_LWDEBUG("LWLockRelease", lockid, lock);
	/*
	 * Remove lock from list of locks held.  Usually, but not always, it
	 * will be the latest-acquired lock; so search array backwards.
	 */
	for (i = num_held_lwlocks; --i >= 0;)
	{
		if (lockid == held_lwlocks[i])
			break;
	}
	if (i < 0)
		elog(ERROR, "LWLockRelease: lock %d is not held", (int) lockid);
	num_held_lwlocks--;
	for (; i < num_held_lwlocks; i++)
		held_lwlocks[i] = held_lwlocks[i + 1];
	/* Acquire mutex.  Time spent holding mutex should be short! */
	SpinLockAcquire_NoHoldoff(&lock->mutex);
	/* Release my hold on lock */
	if (lock->exclusive > 0)
		lock->exclusive--;
	else
	{
		Assert(lock->shared > 0);
		lock->shared--;
	}
	/*
	 * See if I need to awaken any waiters.  If I released a non-last
	 * shared hold, there cannot be anything to do.  Also, do not awaken
	 * any waiters if someone has already awakened waiters that haven't
	 * yet acquired the lock.
	 */
	head = lock->head;
	if (head != NULL)
	{
		if (lock->exclusive == 0 && lock->shared == 0 && lock->releaseOK)
		{
			/*
			 * Remove the to-be-awakened PGPROCs from the queue.  If the
			 * front waiter wants exclusive lock, awaken him only.
			 * Otherwise awaken as many waiters as want shared access.
			 */
			proc = head;
			if (!proc->lwExclusive)
			{
				while (proc->lwWaitLink != NULL &&
					   !proc->lwWaitLink->lwExclusive)
					proc = proc->lwWaitLink;
			}
			/* proc is now the last PGPROC to be released */
			lock->head = proc->lwWaitLink;
			proc->lwWaitLink = NULL;
			/* prevent additional wakeups until retryer gets to run */
			lock->releaseOK = false;
		}
		else
		{
			/* lock is still held, can't awaken anything */
			head = NULL;
		}
	}
	/* We are done updating shared state of the lock itself. */
	SpinLockRelease_NoHoldoff(&lock->mutex);
	/*
	 * Awaken any waiters I removed from the queue.
	 */
	while (head != NULL)
	{
		LOG_LWDEBUG("LWLockRelease", lockid, "release waiter");
		proc = head;
		head = proc->lwWaitLink;
		proc->lwWaitLink = NULL;
		proc->lwWaiting = false;
		PGSemaphoreUnlock(&proc->sem);
	}
	/*
	 * Now okay to allow cancel/die interrupts.
	 */
	RESUME_INTERRUPTS();
}
/*
 * LWLockReleaseAll - release all currently-held locks
 *
 * Used to clean up after elog(ERROR).	An important difference between this
 * function and retail LWLockRelease calls is that InterruptHoldoffCount is
 * unchanged by this operation.  This is necessary since InterruptHoldoffCount
 * has been set to an appropriate level earlier in error recovery.	We could
 * decrement it below zero if we allow it to drop for each released lock!
 */
void
LWLockReleaseAll(void)
{
	while (num_held_lwlocks > 0)
	{
		HOLD_INTERRUPTS();		/* match the upcoming RESUME_INTERRUPTS */
		LWLockRelease(held_lwlocks[num_held_lwlocks - 1]);
	}
}
 |