diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c
index dbf8030f7c..1e2d192f11 100644
--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -116,7 +116,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
* possible deadlocks.
*/
for (i = 0; i < NUM_BUFFER_PARTITIONS; i++)
- LWLockAcquire(FirstBufMappingLock + i, LW_SHARED);
+ LWLockAcquire(BufMappingPartitionLockByIndex(i), LW_SHARED);
/*
* Scan though all the buffers, saving the relevant fields in the
@@ -157,7 +157,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
* avoids O(N^2) behavior inside LWLockRelease.
*/
for (i = NUM_BUFFER_PARTITIONS; --i >= 0;)
- LWLockRelease(FirstBufMappingLock + i);
+ LWLockRelease(BufMappingPartitionLockByIndex(i));
}
funcctx = SRF_PERCALL_SETUP();
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 2f069b768e..858cce3457 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -150,7 +150,7 @@ typedef struct pgssEntry
*/
typedef struct pgssSharedState
{
- LWLockId lock; /* protects hashtable search/modification */
+ LWLock *lock; /* protects hashtable search/modification */
int query_size; /* max query length in bytes */
double cur_median_usage; /* current median usage in hashtable */
} pgssSharedState;
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 4ec6981ab8..82eaf89a6b 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -2212,49 +2212,55 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
lwlock-acquire
- (LWLockId, LWLockMode)
+ (char *, int, LWLockMode)
Probe that fires when an LWLock has been acquired.
- arg0 is the LWLock's ID.
- arg1 is the requested lock mode, either exclusive or shared.
+ arg0 is the LWLock's tranche.
+ arg1 is the LWLock's offset within its trance.
+ arg2 is the requested lock mode, either exclusive or shared.
lwlock-release
- (LWLockId)
+ (char *, int)
Probe that fires when an LWLock has been released (but note
that any released waiters have not yet been awakened).
- arg0 is the LWLock's ID.
+ arg0 is the LWLock's tranche.
+ arg1 is the LWLock's offset within its trance.
lwlock-wait-start
- (LWLockId, LWLockMode)
+ (char *, int, LWLockMode)
Probe that fires when an LWLock was not immediately available and
a server process has begun to wait for the lock to become available.
- arg0 is the LWLock's ID.
- arg1 is the requested lock mode, either exclusive or shared.
+ arg0 is the LWLock's tranche.
+ arg1 is the LWLock's offset within its trance.
+ arg2 is the requested lock mode, either exclusive or shared.
lwlock-wait-done
- (LWLockId, LWLockMode)
+ (char *, int, LWLockMode)
Probe that fires when a server process has been released from its
wait for an LWLock (it does not actually have the lock yet).
- arg0 is the LWLock's ID.
- arg1 is the requested lock mode, either exclusive or shared.
+ arg0 is the LWLock's tranche.
+ arg1 is the LWLock's offset within its trance.
+ arg2 is the requested lock mode, either exclusive or shared.
lwlock-condacquire
- (LWLockId, LWLockMode)
+ (char *, int, LWLockMode)
Probe that fires when an LWLock was successfully acquired when the
caller specified no waiting.
- arg0 is the LWLock's ID.
- arg1 is the requested lock mode, either exclusive or shared.
+ arg0 is the LWLock's tranche.
+ arg1 is the LWLock's offset within its trance.
+ arg2 is the requested lock mode, either exclusive or shared.
lwlock-condacquire-fail
- (LWLockId, LWLockMode)
+ (char *, int, LWLockMode)
Probe that fires when an LWLock was not successfully acquired when
the caller specified no waiting.
- arg0 is the LWLock's ID.
- arg1 is the requested lock mode, either exclusive or shared.
+ arg0 is the LWLock's tranche.
+ arg1 is the LWLock's offset within its trance.
+ arg2 is the requested lock mode, either exclusive or shared.
lock-wait-start
@@ -2299,10 +2305,6 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
LocalTransactionId
unsigned int
-
- LWLockId
- int
-
LWLockMode
int
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index f604aa9b60..b90db9a417 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -151,7 +151,7 @@ SimpleLruShmemSize(int nslots, int nlsns)
sz += MAXALIGN(nslots * sizeof(bool)); /* page_dirty[] */
sz += MAXALIGN(nslots * sizeof(int)); /* page_number[] */
sz += MAXALIGN(nslots * sizeof(int)); /* page_lru_count[] */
- sz += MAXALIGN(nslots * sizeof(LWLockId)); /* buffer_locks[] */
+ sz += MAXALIGN(nslots * sizeof(LWLock *)); /* buffer_locks[] */
if (nlsns > 0)
sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr)); /* group_lsn[] */
@@ -161,7 +161,7 @@ SimpleLruShmemSize(int nslots, int nlsns)
void
SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
- LWLockId ctllock, const char *subdir)
+ LWLock *ctllock, const char *subdir)
{
SlruShared shared;
bool found;
@@ -202,8 +202,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
offset += MAXALIGN(nslots * sizeof(int));
shared->page_lru_count = (int *) (ptr + offset);
offset += MAXALIGN(nslots * sizeof(int));
- shared->buffer_locks = (LWLockId *) (ptr + offset);
- offset += MAXALIGN(nslots * sizeof(LWLockId));
+ shared->buffer_locks = (LWLock **) (ptr + offset);
+ offset += MAXALIGN(nslots * sizeof(LWLock *));
if (nlsns > 0)
{
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index b807b064be..52f87895a6 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -448,8 +448,6 @@ typedef struct
typedef int InheritableSocket;
#endif
-typedef struct LWLock LWLock; /* ugly kluge */
-
/*
* Structure contains all variables passed to exec:ed backends
*/
@@ -473,7 +471,7 @@ typedef struct
#ifndef HAVE_SPINLOCKS
PGSemaphore SpinlockSemaArray;
#endif
- LWLock *LWLockArray;
+ LWLock *MainLWLockArray;
slock_t *ProcStructLock;
PROC_HDR *ProcGlobal;
PGPROC *AuxiliaryProcs;
@@ -5576,7 +5574,6 @@ PostmasterMarkPIDForWorkerNotify(int pid)
* functions. They are marked NON_EXEC_STATIC in their home modules.
*/
extern slock_t *ShmemLock;
-extern LWLock *LWLockArray;
extern slock_t *ProcStructLock;
extern PGPROC *AuxiliaryProcs;
extern PMSignalData *PMSignalState;
@@ -5625,7 +5622,7 @@ save_backend_variables(BackendParameters *param, Port *port,
#ifndef HAVE_SPINLOCKS
param->SpinlockSemaArray = SpinlockSemaArray;
#endif
- param->LWLockArray = LWLockArray;
+ param->MainLWLockArray = MainLWLockArray;
param->ProcStructLock = ProcStructLock;
param->ProcGlobal = ProcGlobal;
param->AuxiliaryProcs = AuxiliaryProcs;
@@ -5856,7 +5853,7 @@ restore_backend_variables(BackendParameters *param, Port *port)
#ifndef HAVE_SPINLOCKS
SpinlockSemaArray = param->SpinlockSemaArray;
#endif
- LWLockArray = param->LWLockArray;
+ MainLWLockArray = param->MainLWLockArray;
ProcStructLock = param->ProcStructLock;
ProcGlobal = param->ProcGlobal;
AuxiliaryProcs = param->AuxiliaryProcs;
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 91f0c7eb36..19eecab4c2 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -146,7 +146,7 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
{
BufferTag newTag; /* identity of requested block */
uint32 newHash; /* hash value for newTag */
- LWLockId newPartitionLock; /* buffer partition lock for it */
+ LWLock *newPartitionLock; /* buffer partition lock for it */
int buf_id;
/* create a tag so we can lookup the buffer */
@@ -539,10 +539,10 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
{
BufferTag newTag; /* identity of requested block */
uint32 newHash; /* hash value for newTag */
- LWLockId newPartitionLock; /* buffer partition lock for it */
+ LWLock *newPartitionLock; /* buffer partition lock for it */
BufferTag oldTag; /* previous identity of selected buffer */
uint32 oldHash; /* hash value for oldTag */
- LWLockId oldPartitionLock; /* buffer partition lock for it */
+ LWLock *oldPartitionLock; /* buffer partition lock for it */
BufFlags oldFlags;
int buf_id;
volatile BufferDesc *buf;
@@ -891,7 +891,7 @@ InvalidateBuffer(volatile BufferDesc *buf)
{
BufferTag oldTag;
uint32 oldHash; /* hash value for oldTag */
- LWLockId oldPartitionLock; /* buffer partition lock for it */
+ LWLock *oldPartitionLock; /* buffer partition lock for it */
BufFlags oldFlags;
/* Save the original buffer tag before dropping the spinlock */
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index cc21923709..2e717457b1 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -182,8 +182,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
* Now initialize LWLocks, which do shared memory allocation and are
* needed for InitShmemIndex.
*/
- if (!IsUnderPostmaster)
- CreateLWLocks();
+ CreateLWLocks();
/*
* Set up shmem.c index hashtable
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 5c8b4b0656..6335129ac2 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -565,7 +565,7 @@ LockHasWaiters(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
LOCALLOCK *locallock;
LOCK *lock;
PROCLOCK *proclock;
- LWLockId partitionLock;
+ LWLock *partitionLock;
bool hasWaiters = false;
if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
@@ -702,7 +702,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
bool found;
ResourceOwner owner;
uint32 hashcode;
- LWLockId partitionLock;
+ LWLock *partitionLock;
int status;
bool log_lock = false;
@@ -1744,7 +1744,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
LOCALLOCK *locallock;
LOCK *lock;
PROCLOCK *proclock;
- LWLockId partitionLock;
+ LWLock *partitionLock;
bool wakeupNeeded;
if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
@@ -2096,10 +2096,12 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
*/
for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++)
{
- LWLockId partitionLock = FirstLockMgrLock + partition;
+ LWLock *partitionLock;
SHM_QUEUE *procLocks = &(MyProc->myProcLocks[partition]);
PROCLOCK *nextplock;
+ partitionLock = LockHashPartitionLockByIndex(partition);
+
/*
* If the proclock list for this partition is empty, we can skip
* acquiring the partition lock. This optimization is trickier than
@@ -2475,7 +2477,7 @@ static bool
FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag,
uint32 hashcode)
{
- LWLockId partitionLock = LockHashPartitionLock(hashcode);
+ LWLock *partitionLock = LockHashPartitionLock(hashcode);
Oid relid = locktag->locktag_field2;
uint32 i;
@@ -2565,7 +2567,7 @@ FastPathGetRelationLockEntry(LOCALLOCK *locallock)
LockMethod lockMethodTable = LockMethods[DEFAULT_LOCKMETHOD];
LOCKTAG *locktag = &locallock->tag.lock;
PROCLOCK *proclock = NULL;
- LWLockId partitionLock = LockHashPartitionLock(locallock->hashcode);
+ LWLock *partitionLock = LockHashPartitionLock(locallock->hashcode);
Oid relid = locktag->locktag_field2;
uint32 f;
@@ -2671,7 +2673,7 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
SHM_QUEUE *procLocks;
PROCLOCK *proclock;
uint32 hashcode;
- LWLockId partitionLock;
+ LWLock *partitionLock;
int count = 0;
int fast_count = 0;
@@ -2883,7 +2885,7 @@ LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc,
PROCLOCKTAG proclocktag;
uint32 hashcode;
uint32 proclock_hashcode;
- LWLockId partitionLock;
+ LWLock *partitionLock;
bool wakeupNeeded;
hashcode = LockTagHashCode(locktag);
@@ -3159,10 +3161,12 @@ PostPrepare_Locks(TransactionId xid)
*/
for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++)
{
- LWLockId partitionLock = FirstLockMgrLock + partition;
+ LWLock *partitionLock;
SHM_QUEUE *procLocks = &(MyProc->myProcLocks[partition]);
PROCLOCK *nextplock;
+ partitionLock = LockHashPartitionLockByIndex(partition);
+
/*
* If the proclock list for this partition is empty, we can skip
* acquiring the partition lock. This optimization is safer than the
@@ -3400,7 +3404,7 @@ GetLockStatusData(void)
* Must grab LWLocks in partition-number order to avoid LWLock deadlock.
*/
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
- LWLockAcquire(FirstLockMgrLock + i, LW_SHARED);
+ LWLockAcquire(LockHashPartitionLockByIndex(i), LW_SHARED);
/* Now we can safely count the number of proclocks */
data->nelements = el + hash_get_num_entries(LockMethodProcLockHash);
@@ -3442,7 +3446,7 @@ GetLockStatusData(void)
* behavior inside LWLockRelease.
*/
for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
- LWLockRelease(FirstLockMgrLock + i);
+ LWLockRelease(LockHashPartitionLockByIndex(i));
Assert(el == data->nelements);
@@ -3477,7 +3481,7 @@ GetRunningTransactionLocks(int *nlocks)
* Must grab LWLocks in partition-number order to avoid LWLock deadlock.
*/
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
- LWLockAcquire(FirstLockMgrLock + i, LW_SHARED);
+ LWLockAcquire(LockHashPartitionLockByIndex(i), LW_SHARED);
/* Now we can safely count the number of proclocks */
els = hash_get_num_entries(LockMethodProcLockHash);
@@ -3537,7 +3541,7 @@ GetRunningTransactionLocks(int *nlocks)
* behavior inside LWLockRelease.
*/
for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
- LWLockRelease(FirstLockMgrLock + i);
+ LWLockRelease(LockHashPartitionLockByIndex(i));
*nlocks = index;
return accessExclusiveLocks;
@@ -3673,7 +3677,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
uint32 hashcode;
uint32 proclock_hashcode;
int partition;
- LWLockId partitionLock;
+ LWLock *partitionLock;
LockMethod lockMethodTable;
Assert(len == sizeof(TwoPhaseLockRecord));
@@ -4044,7 +4048,7 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait)
{
PROCLOCK *proclock;
uint32 hashcode;
- LWLockId partitionLock;
+ LWLock *partitionLock;
hashcode = LockTagHashCode(&tag);
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 0e319a7e6a..55d9d7837c 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -31,50 +31,37 @@
#include "storage/predicate.h"
#include "storage/proc.h"
#include "storage/spin.h"
+#include "utils/memutils.h"
+
+#ifdef LWLOCK_STATS
+#include "utils/hsearch.h"
+#endif
/* We use the ShmemLock spinlock to protect LWLockAssign */
extern slock_t *ShmemLock;
-
-typedef struct LWLock
-{
- slock_t mutex; /* Protects LWLock and queue of PGPROCs */
- bool releaseOK; /* T if ok to release waiters */
- char exclusive; /* # of exclusive holders (0 or 1) */
- int shared; /* # of shared holders (0..MaxBackends) */
- PGPROC *head; /* head of list of waiting PGPROCs */
- PGPROC *tail; /* tail of list of waiting PGPROCs */
- /* tail is undefined when head is NULL */
-} LWLock;
-
/*
- * All the LWLock structs are allocated as an array in shared memory.
- * (LWLockIds are indexes into the array.) We force the array stride to
- * be a power of 2, which saves a few cycles in indexing, but more
- * importantly also ensures that individual LWLocks don't cross cache line
- * boundaries. This reduces cache contention problems, especially on AMD
- * Opterons. (Of course, we have to also ensure that the array start
- * address is suitably aligned.)
- *
- * LWLock is between 16 and 32 bytes on all known platforms, so these two
- * cases are sufficient.
+ * This is indexed by tranche ID and stores metadata for all tranches known
+ * to the current backend.
*/
-#define LWLOCK_PADDED_SIZE (sizeof(LWLock) <= 16 ? 16 : 32)
+static LWLockTranche **LWLockTrancheArray = NULL;
+static int LWLockTranchesAllocated = 0;
-typedef union LWLockPadded
-{
- LWLock lock;
- char pad[LWLOCK_PADDED_SIZE];
-} LWLockPadded;
+#define T_NAME(lock) \
+ (LWLockTrancheArray[(lock)->tranche]->name)
+#define T_ID(lock) \
+ ((int) ((((char *) lock) - \
+ ((char *) LWLockTrancheArray[(lock)->tranche]->array_base)) / \
+ LWLockTrancheArray[(lock)->tranche]->array_stride))
/*
- * This points to the array of LWLocks in shared memory. Backends inherit
+ * This points to the main array of LWLocks in shared memory. Backends inherit
* the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
* where we have special measures to pass it down).
*/
-NON_EXEC_STATIC LWLockPadded *LWLockArray = NULL;
-
+LWLockPadded *MainLWLockArray = NULL;
+static LWLockTranche MainLWLockTranche;
/*
* We use this structure to keep track of locked LWLocks for release
@@ -85,58 +72,78 @@ NON_EXEC_STATIC LWLockPadded *LWLockArray = NULL;
#define MAX_SIMUL_LWLOCKS 100
static int num_held_lwlocks = 0;
-static LWLockId held_lwlocks[MAX_SIMUL_LWLOCKS];
+static LWLock *held_lwlocks[MAX_SIMUL_LWLOCKS];
static int lock_addin_request = 0;
static bool lock_addin_request_allowed = true;
#ifdef LWLOCK_STATS
+typedef struct lwlock_stats_key
+{
+ int tranche;
+ int instance;
+} lwlock_stats_key;
+
+typedef struct lwlock_stats
+{
+ lwlock_stats_key key;
+ int sh_acquire_count;
+ int ex_acquire_count;
+ int block_count;
+ int spin_delay_count;
+} lwlock_stats;
+
static int counts_for_pid = 0;
-static int *sh_acquire_counts;
-static int *ex_acquire_counts;
-static int *block_counts;
-static int *spin_delay_counts;
+static HTAB *lwlock_stats_htab;
#endif
#ifdef LOCK_DEBUG
bool Trace_lwlocks = false;
inline static void
-PRINT_LWDEBUG(const char *where, LWLockId lockid, const volatile LWLock *lock)
+PRINT_LWDEBUG(const char *where, const volatile LWLock *lock)
{
if (Trace_lwlocks)
- elog(LOG, "%s(%d): excl %d shared %d head %p rOK %d",
- where, (int) lockid,
+ elog(LOG, "%s(%s %d): excl %d shared %d head %p rOK %d",
+ where, T_NAME(lock), T_ID(lock),
(int) lock->exclusive, lock->shared, lock->head,
(int) lock->releaseOK);
}
inline static void
-LOG_LWDEBUG(const char *where, LWLockId lockid, const char *msg)
+LOG_LWDEBUG(const char *where, const char *name, int index, const char *msg)
{
if (Trace_lwlocks)
- elog(LOG, "%s(%d): %s", where, (int) lockid, msg);
+ elog(LOG, "%s(%s %d): %s", where, name, index, msg);
}
#else /* not LOCK_DEBUG */
-#define PRINT_LWDEBUG(a,b,c)
-#define LOG_LWDEBUG(a,b,c)
+#define PRINT_LWDEBUG(a,b)
+#define LOG_LWDEBUG(a,b,c,d)
#endif /* LOCK_DEBUG */
#ifdef LWLOCK_STATS
static void init_lwlock_stats(void);
static void print_lwlock_stats(int code, Datum arg);
+static lwlock_stats *get_lwlock_stats_entry(LWLock *lockid);
static void
init_lwlock_stats(void)
{
- int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
- int numLocks = LWLockCounter[1];
+ HASHCTL ctl;
- sh_acquire_counts = calloc(numLocks, sizeof(int));
- ex_acquire_counts = calloc(numLocks, sizeof(int));
- spin_delay_counts = calloc(numLocks, sizeof(int));
- block_counts = calloc(numLocks, sizeof(int));
+ if (lwlock_stats_htab != NULL)
+ {
+ hash_destroy(lwlock_stats_htab);
+ lwlock_stats_htab = NULL;
+ }
+
+ MemSet(&ctl, 0, sizeof(ctl));
+ ctl.keysize = sizeof(lwlock_stats_key);
+ ctl.entrysize = sizeof(lwlock_stats);
+ ctl.hash = tag_hash;
+ lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
+ HASH_ELEM | HASH_FUNCTION);
counts_for_pid = MyProcPid;
on_shmem_exit(print_lwlock_stats, 0);
}
@@ -144,30 +151,58 @@ init_lwlock_stats(void)
static void
print_lwlock_stats(int code, Datum arg)
{
- int i;
- int *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
- int numLocks = LWLockCounter[1];
+ HASH_SEQ_STATUS scan;
+ lwlock_stats *lwstats;
+
+ hash_seq_init(&scan, lwlock_stats_htab);
/* Grab an LWLock to keep different backends from mixing reports */
- LWLockAcquire(0, LW_EXCLUSIVE);
+ LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
- for (i = 0; i < numLocks; i++)
+ while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
{
- if (sh_acquire_counts[i] || ex_acquire_counts[i] || block_counts[i] || spin_delay_counts[i])
- fprintf(stderr, "PID %d lwlock %d: shacq %u exacq %u blk %u spindelay %u\n",
- MyProcPid, i, sh_acquire_counts[i], ex_acquire_counts[i],
- block_counts[i], spin_delay_counts[i]);
+ fprintf(stderr,
+ "PID %d lwlock %s %d: shacq %u exacq %u blk %u spindelay %u\n",
+ MyProcPid, LWLockTrancheArray[lwstats->key.tranche]->name,
+ lwstats->key.instance, lwstats->sh_acquire_count,
+ lwstats->ex_acquire_count, lwstats->block_count,
+ lwstats->spin_delay_count);
}
- LWLockRelease(0);
+ LWLockRelease(&MainLWLockArray[0].lock);
+}
+
+static lwlock_stats *
+get_lwlock_stats_entry(LWLock *lock)
+{
+ lwlock_stats_key key;
+ lwlock_stats *lwstats;
+ bool found;
+
+ /* Set up local count state first time through in a given process */
+ if (counts_for_pid != MyProcPid)
+ init_lwlock_stats();
+
+ /* Fetch or create the entry. */
+ key.tranche = lock->tranche;
+ key.instance = T_ID(lock);
+ lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
+ if (!found)
+ {
+ lwstats->sh_acquire_count = 0;
+ lwstats->ex_acquire_count = 0;
+ lwstats->block_count = 0;
+ lwstats->spin_delay_count = 0;
+ }
+ return lwstats;
}
#endif /* LWLOCK_STATS */
/*
- * Compute number of LWLocks to allocate.
+ * Compute number of LWLocks to allocate in the main array.
*/
-int
+static int
NumLWLocks(void)
{
int numLocks;
@@ -180,7 +215,7 @@ NumLWLocks(void)
*/
/* Predefined LWLocks */
- numLocks = (int) NumFixedLWLocks;
+ numLocks = NUM_FIXED_LWLOCKS;
/* bufmgr.c needs two for each shared buffer */
numLocks += 2 * NBuffers;
@@ -248,56 +283,67 @@ LWLockShmemSize(void)
size = mul_size(numLocks, sizeof(LWLockPadded));
/* Space for dynamic allocation counter, plus room for alignment. */
- size = add_size(size, 2 * sizeof(int) + LWLOCK_PADDED_SIZE);
+ size = add_size(size, 3 * sizeof(int) + LWLOCK_PADDED_SIZE);
return size;
}
/*
- * Allocate shmem space for LWLocks and initialize the locks.
+ * Allocate shmem space for the main LWLock array and initialize it. We also
+ * register the main tranch here.
*/
void
CreateLWLocks(void)
{
- int numLocks = NumLWLocks();
- Size spaceLocks = LWLockShmemSize();
- LWLockPadded *lock;
- int *LWLockCounter;
- char *ptr;
- int id;
-
- /* Allocate space */
- ptr = (char *) ShmemAlloc(spaceLocks);
-
- /* Leave room for dynamic allocation counter */
- ptr += 2 * sizeof(int);
-
- /* Ensure desired alignment of LWLock array */
- ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
-
- LWLockArray = (LWLockPadded *) ptr;
-
- /*
- * Initialize all LWLocks to "unlocked" state
- */
- for (id = 0, lock = LWLockArray; id < numLocks; id++, lock++)
+ if (!IsUnderPostmaster)
{
- SpinLockInit(&lock->lock.mutex);
- lock->lock.releaseOK = true;
- lock->lock.exclusive = 0;
- lock->lock.shared = 0;
- lock->lock.head = NULL;
- lock->lock.tail = NULL;
+ int numLocks = NumLWLocks();
+ Size spaceLocks = LWLockShmemSize();
+ LWLockPadded *lock;
+ int *LWLockCounter;
+ char *ptr;
+ int id;
+
+ /* Allocate space */
+ ptr = (char *) ShmemAlloc(spaceLocks);
+
+ /* Leave room for dynamic allocation of locks and tranches */
+ ptr += 3 * sizeof(int);
+
+ /* Ensure desired alignment of LWLock array */
+ ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
+
+ MainLWLockArray = (LWLockPadded *) ptr;
+
+ /* Initialize all LWLocks in main array */
+ for (id = 0, lock = MainLWLockArray; id < numLocks; id++, lock++)
+ LWLockInitialize(&lock->lock, 0);
+
+ /*
+ * Initialize the dynamic-allocation counters, which are stored just
+ * before the first LWLock. LWLockCounter[0] is the allocation
+ * counter for lwlocks, LWLockCounter[1] is the maximum number that
+ * can be allocated from the main array, and LWLockCounter[2] is the
+ * allocation counter for tranches.
+ */
+ LWLockCounter = (int *) ((char *) MainLWLockArray - 3 * sizeof(int));
+ LWLockCounter[0] = NUM_FIXED_LWLOCKS;
+ LWLockCounter[1] = numLocks;
+ LWLockCounter[2] = 1; /* 0 is the main array */
}
- /*
- * Initialize the dynamic-allocation counter, which is stored just before
- * the first LWLock.
- */
- LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
- LWLockCounter[0] = (int) NumFixedLWLocks;
- LWLockCounter[1] = numLocks;
+ if (LWLockTrancheArray == NULL)
+ {
+ LWLockTranchesAllocated = 16;
+ LWLockTrancheArray = MemoryContextAlloc(TopMemoryContext,
+ LWLockTranchesAllocated * sizeof(LWLockTranche *));
+ }
+
+ MainLWLockTranche.name = "main";
+ MainLWLockTranche.array_base = MainLWLockArray;
+ MainLWLockTranche.array_stride = sizeof(LWLockPadded);
+ LWLockRegisterTranche(0, &MainLWLockTranche);
}
@@ -309,26 +355,86 @@ CreateLWLocks(void)
* startup, but it is needed if any user-defined code tries to allocate
* LWLocks after startup.
*/
-LWLockId
+LWLock *
LWLockAssign(void)
{
- LWLockId result;
+ LWLock *result;
/* use volatile pointer to prevent code rearrangement */
volatile int *LWLockCounter;
- LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
+ LWLockCounter = (int *) ((char *) MainLWLockArray - 3 * sizeof(int));
SpinLockAcquire(ShmemLock);
if (LWLockCounter[0] >= LWLockCounter[1])
{
SpinLockRelease(ShmemLock);
- elog(ERROR, "no more LWLockIds available");
+ elog(ERROR, "no more LWLocks available");
}
- result = (LWLockId) (LWLockCounter[0]++);
+ result = &MainLWLockArray[LWLockCounter[0]++].lock;
SpinLockRelease(ShmemLock);
return result;
}
+/*
+ * Allocate a new tranche ID.
+ */
+int
+LWLockNewTrancheId(void)
+{
+ int result;
+
+ /* use volatile pointer to prevent code rearrangement */
+ volatile int *LWLockCounter;
+
+ LWLockCounter = (int *) ((char *) MainLWLockArray - 3 * sizeof(int));
+ SpinLockAcquire(ShmemLock);
+ result = LWLockCounter[2]++;
+ SpinLockRelease(ShmemLock);
+
+ return result;
+}
+
+/*
+ * Register a tranche ID in the lookup table for the current process. This
+ * routine will save a pointer to the tranche object passed as an argument,
+ * so that object should be allocated in a backend-lifetime context
+ * (TopMemoryContext, static variable, or similar).
+ */
+void
+LWLockRegisterTranche(int tranche_id, LWLockTranche *tranche)
+{
+ Assert(LWLockTrancheArray != NULL);
+
+ if (tranche_id >= LWLockTranchesAllocated)
+ {
+ int i = LWLockTranchesAllocated;
+
+ while (i < tranche_id)
+ i *= 2;
+
+ LWLockTrancheArray = repalloc(LWLockTrancheArray,
+ i * sizeof(LWLockTranche *));
+ LWLockTranchesAllocated = i;
+ }
+
+ LWLockTrancheArray[tranche_id] = tranche;
+}
+
+/*
+ * LWLockInitialize - initialize a new lwlock; it's initially unlocked
+ */
+void
+LWLockInitialize(LWLock *lock, int tranche_id)
+{
+ SpinLockInit(&lock->mutex);
+ lock->releaseOK = true;
+ lock->exclusive = 0;
+ lock->shared = 0;
+ lock->tranche = tranche_id;
+ lock->head = NULL;
+ lock->tail = NULL;
+}
+
/*
* LWLockAcquire - acquire a lightweight lock in the specified mode
@@ -338,24 +444,26 @@ LWLockAssign(void)
* Side effect: cancel/die interrupts are held off until lock release.
*/
void
-LWLockAcquire(LWLockId lockid, LWLockMode mode)
+LWLockAcquire(LWLock *l, LWLockMode mode)
{
- volatile LWLock *lock = &(LWLockArray[lockid].lock);
+ volatile LWLock *lock = l;
PGPROC *proc = MyProc;
bool retry = false;
int extraWaits = 0;
+#ifdef LWLOCK_STATS
+ lwlock_stats *lwstats;
+#endif
- PRINT_LWDEBUG("LWLockAcquire", lockid, lock);
+ PRINT_LWDEBUG("LWLockAcquire", lock);
#ifdef LWLOCK_STATS
- /* Set up local count state first time through in a given process */
- if (counts_for_pid != MyProcPid)
- init_lwlock_stats();
+ lwstats = get_lwlock_stats_entry(l);
+
/* Count lock acquisition attempts */
if (mode == LW_EXCLUSIVE)
- ex_acquire_counts[lockid]++;
+ lwstats->ex_acquire_count++;
else
- sh_acquire_counts[lockid]++;
+ lwstats->sh_acquire_count++;
#endif /* LWLOCK_STATS */
/*
@@ -398,7 +506,7 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
/* Acquire mutex. Time spent holding mutex should be short! */
#ifdef LWLOCK_STATS
- spin_delay_counts[lockid] += SpinLockAcquire(&lock->mutex);
+ lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
#else
SpinLockAcquire(&lock->mutex);
#endif
@@ -466,13 +574,13 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
* so that the lock manager or signal manager will see the received
* signal when it next waits.
*/
- LOG_LWDEBUG("LWLockAcquire", lockid, "waiting");
+ LOG_LWDEBUG("LWLockAcquire", T_NAME(l), T_ID(l), "waiting");
#ifdef LWLOCK_STATS
- block_counts[lockid]++;
+ lwstats->block_count++;
#endif
- TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode);
+ TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(l), T_ID(l), mode);
for (;;)
{
@@ -483,9 +591,9 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
extraWaits++;
}
- TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode);
+ TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(l), T_ID(l), mode);
- LOG_LWDEBUG("LWLockAcquire", lockid, "awakened");
+ LOG_LWDEBUG("LWLockAcquire", T_NAME(l), T_ID(l), "awakened");
/* Now loop back and try to acquire lock again. */
retry = true;
@@ -494,10 +602,10 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
/* We are done updating shared state of the lock itself. */
SpinLockRelease(&lock->mutex);
- TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode);
+ TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(l), T_ID(l), mode);
/* Add lock to list of locks held by this backend */
- held_lwlocks[num_held_lwlocks++] = lockid;
+ held_lwlocks[num_held_lwlocks++] = l;
/*
* Fix the process wait semaphore's count for any absorbed wakeups.
@@ -514,12 +622,12 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
* If successful, cancel/die interrupts are held off until lock release.
*/
bool
-LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
+LWLockConditionalAcquire(LWLock *l, LWLockMode mode)
{
- volatile LWLock *lock = &(LWLockArray[lockid].lock);
+ volatile LWLock *lock = l;
bool mustwait;
- PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, lock);
+ PRINT_LWDEBUG("LWLockConditionalAcquire", lock);
/* Ensure we will have room to remember the lock */
if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
@@ -564,14 +672,14 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
{
/* Failed to get lock, so release interrupt holdoff */
RESUME_INTERRUPTS();
- LOG_LWDEBUG("LWLockConditionalAcquire", lockid, "failed");
- TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(lockid, mode);
+ LOG_LWDEBUG("LWLockConditionalAcquire", T_NAME(l), T_ID(l), "failed");
+ TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(l), T_ID(l), mode);
}
else
{
/* Add lock to list of locks held by this backend */
- held_lwlocks[num_held_lwlocks++] = lockid;
- TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(lockid, mode);
+ held_lwlocks[num_held_lwlocks++] = l;
+ TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(l), T_ID(l), mode);
}
return !mustwait;
@@ -592,19 +700,20 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
* wake up, observe that their records have already been flushed, and return.
*/
bool
-LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
+LWLockAcquireOrWait(LWLock *l, LWLockMode mode)
{
- volatile LWLock *lock = &(LWLockArray[lockid].lock);
+ volatile LWLock *lock = l;
PGPROC *proc = MyProc;
bool mustwait;
int extraWaits = 0;
+#ifdef LWLOCK_STATS
+ lwlock_stats *lwstats;
+#endif
- PRINT_LWDEBUG("LWLockAcquireOrWait", lockid, lock);
+ PRINT_LWDEBUG("LWLockAcquireOrWait", lock);
#ifdef LWLOCK_STATS
- /* Set up local count state first time through in a given process */
- if (counts_for_pid != MyProcPid)
- init_lwlock_stats();
+ lwstats = get_lwlock_stats_entry(l);
#endif
/* Ensure we will have room to remember the lock */
@@ -671,13 +780,13 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
* Wait until awakened. Like in LWLockAcquire, be prepared for bogus
* wakups, because we share the semaphore with ProcWaitForSignal.
*/
- LOG_LWDEBUG("LWLockAcquireOrWait", lockid, "waiting");
+ LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(l), T_ID(l), "waiting");
#ifdef LWLOCK_STATS
- block_counts[lockid]++;
+ lwstats->block_count++;
#endif
- TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode);
+ TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(l), T_ID(l), mode);
for (;;)
{
@@ -688,9 +797,9 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
extraWaits++;
}
- TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode);
+ TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(l), T_ID(l), mode);
- LOG_LWDEBUG("LWLockAcquireOrWait", lockid, "awakened");
+ LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(l), T_ID(l), "awakened");
}
else
{
@@ -708,14 +817,14 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
{
/* Failed to get lock, so release interrupt holdoff */
RESUME_INTERRUPTS();
- LOG_LWDEBUG("LWLockAcquireOrWait", lockid, "failed");
- TRACE_POSTGRESQL_LWLOCK_WAIT_UNTIL_FREE_FAIL(lockid, mode);
+ LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(l), T_ID(l), "failed");
+ TRACE_POSTGRESQL_LWLOCK_WAIT_UNTIL_FREE_FAIL(T_NAME(l), T_ID(l), mode);
}
else
{
/* Add lock to list of locks held by this backend */
- held_lwlocks[num_held_lwlocks++] = lockid;
- TRACE_POSTGRESQL_LWLOCK_WAIT_UNTIL_FREE(lockid, mode);
+ held_lwlocks[num_held_lwlocks++] = l;
+ TRACE_POSTGRESQL_LWLOCK_WAIT_UNTIL_FREE(T_NAME(l), T_ID(l), mode);
}
return !mustwait;
@@ -725,14 +834,14 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
* LWLockRelease - release a previously acquired lock
*/
void
-LWLockRelease(LWLockId lockid)
+LWLockRelease(LWLock *l)
{
- volatile LWLock *lock = &(LWLockArray[lockid].lock);
+ volatile LWLock *lock = l;
PGPROC *head;
PGPROC *proc;
int i;
- PRINT_LWDEBUG("LWLockRelease", lockid, lock);
+ PRINT_LWDEBUG("LWLockRelease", lock);
/*
* Remove lock from list of locks held. Usually, but not always, it will
@@ -740,11 +849,11 @@ LWLockRelease(LWLockId lockid)
*/
for (i = num_held_lwlocks; --i >= 0;)
{
- if (lockid == held_lwlocks[i])
+ if (l == held_lwlocks[i])
break;
}
if (i < 0)
- elog(ERROR, "lock %d is not held", (int) lockid);
+ elog(ERROR, "lock %s %d is not held", T_NAME(l), T_ID(l));
num_held_lwlocks--;
for (; i < num_held_lwlocks; i++)
held_lwlocks[i] = held_lwlocks[i + 1];
@@ -824,14 +933,14 @@ LWLockRelease(LWLockId lockid)
/* We are done updating shared state of the lock itself. */
SpinLockRelease(&lock->mutex);
- TRACE_POSTGRESQL_LWLOCK_RELEASE(lockid);
+ TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(l), T_ID(l));
/*
* Awaken any waiters I removed from the queue.
*/
while (head != NULL)
{
- LOG_LWDEBUG("LWLockRelease", lockid, "release waiter");
+ LOG_LWDEBUG("LWLockRelease", T_NAME(l), T_ID(l), "release waiter");
proc = head;
head = proc->lwWaitLink;
proc->lwWaitLink = NULL;
@@ -874,13 +983,13 @@ LWLockReleaseAll(void)
* lock is held shared or exclusive.
*/
bool
-LWLockHeldByMe(LWLockId lockid)
+LWLockHeldByMe(LWLock *l)
{
int i;
for (i = 0; i < num_held_lwlocks; i++)
{
- if (held_lwlocks[i] == lockid)
+ if (held_lwlocks[i] == l)
return true;
}
return false;
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index e7f44cce84..6700072027 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -241,7 +241,10 @@
#define PredicateLockHashPartition(hashcode) \
((hashcode) % NUM_PREDICATELOCK_PARTITIONS)
#define PredicateLockHashPartitionLock(hashcode) \
- ((LWLockId) (FirstPredicateLockMgrLock + PredicateLockHashPartition(hashcode)))
+ (&MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + \
+ PredicateLockHashPartition(hashcode)].lock)
+#define PredicateLockHashPartitionLockByIndex(i) \
+ (&MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)
#define NPREDICATELOCKTARGETENTS() \
mul_size(max_predicate_locks_per_xact, add_size(MaxBackends, max_prepared_xacts))
@@ -383,7 +386,7 @@ static SHM_QUEUE *FinishedSerializableTransactions;
*/
static const PREDICATELOCKTARGETTAG ScratchTargetTag = {0, 0, 0, 0};
static uint32 ScratchTargetTagHash;
-static int ScratchPartitionLock;
+static LWLock *ScratchPartitionLock;
/*
* The local hash table used to determine when to combine multiple fine-
@@ -1398,7 +1401,7 @@ GetPredicateLockStatusData(void)
* in ascending order, then SerializableXactHashLock.
*/
for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
- LWLockAcquire(FirstPredicateLockMgrLock + i, LW_SHARED);
+ LWLockAcquire(PredicateLockHashPartitionLockByIndex(i), LW_SHARED);
LWLockAcquire(SerializableXactHashLock, LW_SHARED);
/* Get number of locks and allocate appropriately-sized arrays. */
@@ -1427,7 +1430,7 @@ GetPredicateLockStatusData(void)
/* Release locks in reverse order */
LWLockRelease(SerializableXactHashLock);
for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
- LWLockRelease(FirstPredicateLockMgrLock + i);
+ LWLockRelease(PredicateLockHashPartitionLockByIndex(i));
return data;
}
@@ -1856,7 +1859,7 @@ PageIsPredicateLocked(Relation relation, BlockNumber blkno)
{
PREDICATELOCKTARGETTAG targettag;
uint32 targettaghash;
- LWLockId partitionLock;
+ LWLock *partitionLock;
PREDICATELOCKTARGET *target;
SET_PREDICATELOCKTARGETTAG_PAGE(targettag,
@@ -2089,7 +2092,7 @@ DeleteChildTargetLocks(const PREDICATELOCKTARGETTAG *newtargettag)
if (TargetTagIsCoveredBy(oldtargettag, *newtargettag))
{
uint32 oldtargettaghash;
- LWLockId partitionLock;
+ LWLock *partitionLock;
PREDICATELOCK *rmpredlock PG_USED_FOR_ASSERTS_ONLY;
oldtargettaghash = PredicateLockTargetTagHashCode(&oldtargettag);
@@ -2301,7 +2304,7 @@ CreatePredicateLock(const PREDICATELOCKTARGETTAG *targettag,
PREDICATELOCKTARGET *target;
PREDICATELOCKTAG locktag;
PREDICATELOCK *lock;
- LWLockId partitionLock;
+ LWLock *partitionLock;
bool found;
partitionLock = PredicateLockHashPartitionLock(targettaghash);
@@ -2599,10 +2602,10 @@ TransferPredicateLocksToNewTarget(PREDICATELOCKTARGETTAG oldtargettag,
bool removeOld)
{
uint32 oldtargettaghash;
- LWLockId oldpartitionLock;
+ LWLock *oldpartitionLock;
PREDICATELOCKTARGET *oldtarget;
uint32 newtargettaghash;
- LWLockId newpartitionLock;
+ LWLock *newpartitionLock;
bool found;
bool outOfShmem = false;
@@ -2858,7 +2861,7 @@ DropAllPredicateLocksFromTable(Relation relation, bool transfer)
/* Acquire locks on all lock partitions */
LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE);
for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
- LWLockAcquire(FirstPredicateLockMgrLock + i, LW_EXCLUSIVE);
+ LWLockAcquire(PredicateLockHashPartitionLockByIndex(i), LW_EXCLUSIVE);
LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
/*
@@ -2996,7 +2999,7 @@ DropAllPredicateLocksFromTable(Relation relation, bool transfer)
/* Release locks in reverse order */
LWLockRelease(SerializableXactHashLock);
for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
- LWLockRelease(FirstPredicateLockMgrLock + i);
+ LWLockRelease(PredicateLockHashPartitionLockByIndex(i));
LWLockRelease(SerializablePredicateLockListLock);
}
@@ -3611,7 +3614,7 @@ ClearOldPredicateLocks(void)
PREDICATELOCKTARGET *target;
PREDICATELOCKTARGETTAG targettag;
uint32 targettaghash;
- LWLockId partitionLock;
+ LWLock *partitionLock;
tag = predlock->tag;
target = tag.myTarget;
@@ -3690,7 +3693,7 @@ ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial,
PREDICATELOCKTARGET *target;
PREDICATELOCKTARGETTAG targettag;
uint32 targettaghash;
- LWLockId partitionLock;
+ LWLock *partitionLock;
nextpredlock = (PREDICATELOCK *)
SHMQueueNext(&(sxact->predicateLocks),
@@ -4068,7 +4071,7 @@ static void
CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag)
{
uint32 targettaghash;
- LWLockId partitionLock;
+ LWLock *partitionLock;
PREDICATELOCKTARGET *target;
PREDICATELOCK *predlock;
PREDICATELOCK *mypredlock = NULL;
@@ -4360,7 +4363,7 @@ CheckTableForSerializableConflictIn(Relation relation)
LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE);
for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
- LWLockAcquire(FirstPredicateLockMgrLock + i, LW_SHARED);
+ LWLockAcquire(PredicateLockHashPartitionLockByIndex(i), LW_SHARED);
LWLockAcquire(SerializableXactHashLock, LW_SHARED);
/* Scan through target list */
@@ -4407,7 +4410,7 @@ CheckTableForSerializableConflictIn(Relation relation)
/* Release locks in reverse order */
LWLockRelease(SerializableXactHashLock);
for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
- LWLockRelease(FirstPredicateLockMgrLock + i);
+ LWLockRelease(PredicateLockHashPartitionLockByIndex(i));
LWLockRelease(SerializablePredicateLockListLock);
}
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index ee6c24cea7..1a683b8336 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -189,7 +189,8 @@ InitProcGlobal(void)
*/
procs = (PGPROC *) ShmemAlloc(TotalProcs * sizeof(PGPROC));
ProcGlobal->allProcs = procs;
- ProcGlobal->allProcCount = TotalProcs;
+ /* XXX allProcCount isn't really all of them; it excludes prepared xacts */
+ ProcGlobal->allProcCount = MaxBackends + NUM_AUXILIARY_PROCS;
if (!procs)
ereport(FATAL,
(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -663,7 +664,7 @@ IsWaitingForLock(void)
void
LockErrorCleanup(void)
{
- LWLockId partitionLock;
+ LWLock *partitionLock;
DisableTimeoutParams timeouts[2];
AbortStrongLockAcquire();
@@ -942,7 +943,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
LOCK *lock = locallock->lock;
PROCLOCK *proclock = locallock->proclock;
uint32 hashcode = locallock->hashcode;
- LWLockId partitionLock = LockHashPartitionLock(hashcode);
+ LWLock *partitionLock = LockHashPartitionLock(hashcode);
PROC_QUEUE *waitQueue = &(lock->waitProcs);
LOCKMASK myHeldLocks = MyProc->heldLocks;
bool early_deadlock = false;
@@ -1440,7 +1441,7 @@ CheckDeadLock(void)
* interrupts.
*/
for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
- LWLockAcquire(FirstLockMgrLock + i, LW_EXCLUSIVE);
+ LWLockAcquire(LockHashPartitionLockByIndex(i), LW_EXCLUSIVE);
/*
* Check to see if we've been awoken by anyone in the interim.
@@ -1522,7 +1523,7 @@ CheckDeadLock(void)
*/
check_done:
for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
- LWLockRelease(FirstLockMgrLock + i);
+ LWLockRelease(LockHashPartitionLockByIndex(i));
}
diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d
index 17c8e15f35..804ba6ae62 100644
--- a/src/backend/utils/probes.d
+++ b/src/backend/utils/probes.d
@@ -15,7 +15,6 @@
* in probe definitions, as they cause compilation errors on Mac OS X 10.5.
*/
#define LocalTransactionId unsigned int
-#define LWLockId int
#define LWLockMode int
#define LOCKMODE int
#define BlockNumber unsigned int
@@ -29,14 +28,14 @@ provider postgresql {
probe transaction__commit(LocalTransactionId);
probe transaction__abort(LocalTransactionId);
- probe lwlock__acquire(LWLockId, LWLockMode);
- probe lwlock__release(LWLockId);
- probe lwlock__wait__start(LWLockId, LWLockMode);
- probe lwlock__wait__done(LWLockId, LWLockMode);
- probe lwlock__condacquire(LWLockId, LWLockMode);
- probe lwlock__condacquire__fail(LWLockId, LWLockMode);
- probe lwlock__wait__until__free(LWLockId, LWLockMode);
- probe lwlock__wait__until__free__fail(LWLockId, LWLockMode);
+ probe lwlock__acquire(const char *, int, LWLockMode);
+ probe lwlock__release(const char *, int);
+ probe lwlock__wait__start(const char *, int, LWLockMode);
+ probe lwlock__wait__done(const char *, int, LWLockMode);
+ probe lwlock__condacquire(const char *, int, LWLockMode);
+ probe lwlock__condacquire__fail(const char *, int, LWLockMode);
+ probe lwlock__wait__until__free(const char *, int, LWLockMode);
+ probe lwlock__wait__until__free__fail(const char *, int, LWLockMode);
probe lock__wait__start(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, LOCKMODE);
probe lock__wait__done(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, LOCKMODE);
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index 4ec11b1ec2..c7b4186ffa 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -55,7 +55,7 @@ typedef enum
*/
typedef struct SlruSharedData
{
- LWLockId ControlLock;
+ LWLock *ControlLock;
/* Number of buffers managed by this SLRU structure */
int num_slots;
@@ -69,7 +69,7 @@ typedef struct SlruSharedData
bool *page_dirty;
int *page_number;
int *page_lru_count;
- LWLockId *buffer_locks;
+ LWLock **buffer_locks;
/*
* Optional array of WAL flush LSNs associated with entries in the SLRU
@@ -136,7 +136,7 @@ typedef SlruCtlData *SlruCtl;
extern Size SimpleLruShmemSize(int nslots, int nlsns);
extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
- LWLockId ctllock, const char *subdir);
+ LWLock *ctllock, const char *subdir);
extern int SimpleLruZeroPage(SlruCtl ctl, int pageno);
extern int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
TransactionId xid);
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 457390fc87..93a0030c3e 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -104,7 +104,10 @@ typedef struct buftag
#define BufTableHashPartition(hashcode) \
((hashcode) % NUM_BUFFER_PARTITIONS)
#define BufMappingPartitionLock(hashcode) \
- ((LWLockId) (FirstBufMappingLock + BufTableHashPartition(hashcode)))
+ (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + \
+ BufTableHashPartition(hashcode)].lock)
+#define BufMappingPartitionLockByIndex(i) \
+ (&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + (i)].lock)
/*
* BufferDesc -- shared descriptor/state data for a single shared buffer.
@@ -144,8 +147,8 @@ typedef struct sbufdesc
int buf_id; /* buffer's index number (from 0) */
int freeNext; /* link in freelist chain */
- LWLockId io_in_progress_lock; /* to wait for I/O to complete */
- LWLockId content_lock; /* to lock access to buffer contents */
+ LWLock *io_in_progress_lock; /* to wait for I/O to complete */
+ LWLock *content_lock; /* to lock access to buffer contents */
} BufferDesc;
#define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index f6a2029e2a..ceeab9fc8a 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -483,8 +483,10 @@ typedef enum
#define LockHashPartition(hashcode) \
((hashcode) % NUM_LOCK_PARTITIONS)
#define LockHashPartitionLock(hashcode) \
- ((LWLockId) (FirstLockMgrLock + LockHashPartition(hashcode)))
-
+ (&MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + \
+ LockHashPartition(hashcode)].lock)
+#define LockHashPartitionLockByIndex(i) \
+ (&MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)
/*
* function prototypes
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index efdb8b5faf..4507926274 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -14,10 +14,123 @@
#ifndef LWLOCK_H
#define LWLOCK_H
+#include "storage/s_lock.h"
+
+struct PGPROC;
+
+/*
+ * It's occasionally necessary to identify a particular LWLock "by name"; e.g.
+ * because we wish to report the lock to dtrace. We could store a name or
+ * other identifying information in the lock itself, but since it's common
+ * to have many nearly-identical locks (e.g. one per buffer) this would end
+ * up wasting significant amounts of memory. Instead, each lwlock stores a
+ * tranche ID which tells us which array it's part of. Based on that, we can
+ * figure out where the lwlock lies within the array using the data structure
+ * shown below; the lock is then identified based on the tranche name and
+ * computed array index. We need the array stride because the array might not
+ * be an array of lwlocks, but rather some larger data structure that includes
+ * one or more lwlocks per element.
+ */
+typedef struct LWLockTranche
+{
+ const char *name;
+ void *array_base;
+ Size array_stride;
+} LWLockTranche;
+
+/*
+ * Code outside of lwlock.c should not manipulate the contents of this
+ * structure directly, but we have to declare it here to allow LWLocks to be
+ * incorporated into other data structures.
+ */
+typedef struct LWLock
+{
+ slock_t mutex; /* Protects LWLock and queue of PGPROCs */
+ bool releaseOK; /* T if ok to release waiters */
+ char exclusive; /* # of exclusive holders (0 or 1) */
+ int shared; /* # of shared holders (0..MaxBackends) */
+ int tranche; /* tranche ID */
+ struct PGPROC *head; /* head of list of waiting PGPROCs */
+ struct PGPROC *tail; /* tail of list of waiting PGPROCs */
+ /* tail is undefined when head is NULL */
+} LWLock;
+
+/*
+ * Prior to PostgreSQL 9.4, every lightweight lock in the system was stored
+ * in a single array. For convenience and for compatibility with past
+ * releases, we still have a main array, but it's now also permissible to
+ * store LWLocks elsewhere in the main shared memory segment or in a dynamic
+ * shared memory segment. In the main array, we force the array stride to
+ * be a power of 2, which saves a few cycles in indexing, but more importantly
+ * also ensures that individual LWLocks don't cross cache line boundaries.
+ * This reduces cache contention problems, especially on AMD Opterons.
+ * (Of course, we have to also ensure that the array start address is suitably
+ * aligned.)
+ *
+ * Even on a 32-bit platform, an lwlock will be more than 16 bytes, because
+ * it contains 2 integers and 2 pointers, plus other stuff. It should fit
+ * into 32 bytes, though, unless slock_t is really big. On a 64-bit platform,
+ * it should fit into 32 bytes unless slock_t is larger than 4 bytes. We
+ * allow for that just in case.
+ */
+#define LWLOCK_PADDED_SIZE (sizeof(LWLock) <= 32 ? 32 : 64)
+
+typedef union LWLockPadded
+{
+ LWLock lock;
+ char pad[LWLOCK_PADDED_SIZE];
+} LWLockPadded;
+extern LWLockPadded *MainLWLockArray;
+
+/*
+ * Some commonly-used locks have predefined positions within MainLWLockArray;
+ * defining macros here makes it much easier to keep track of these. If you
+ * add a lock, add it to the end to avoid renumbering the existing locks;
+ * if you remove a lock, consider leaving a gap in the numbering sequence for
+ * the benefit of DTrace and other external debugging scripts.
+ */
+#define BufFreelistLock (&MainLWLockArray[0].lock)
+#define ShmemIndexLock (&MainLWLockArray[1].lock)
+#define OidGenLock (&MainLWLockArray[2].lock)
+#define XidGenLock (&MainLWLockArray[3].lock)
+#define ProcArrayLock (&MainLWLockArray[4].lock)
+#define SInvalReadLock (&MainLWLockArray[5].lock)
+#define SInvalWriteLock (&MainLWLockArray[6].lock)
+#define WALBufMappingLock (&MainLWLockArray[7].lock)
+#define WALWriteLock (&MainLWLockArray[8].lock)
+#define ControlFileLock (&MainLWLockArray[9].lock)
+#define CheckpointLock (&MainLWLockArray[10].lock)
+#define CLogControlLock (&MainLWLockArray[11].lock)
+#define SubtransControlLock (&MainLWLockArray[12].lock)
+#define MultiXactGenLock (&MainLWLockArray[13].lock)
+#define MultiXactOffsetControlLock (&MainLWLockArray[14].lock)
+#define MultiXactMemberControlLock (&MainLWLockArray[15].lock)
+#define RelCacheInitLock (&MainLWLockArray[16].lock)
+#define CheckpointerCommLock (&MainLWLockArray[17].lock)
+#define TwoPhaseStateLock (&MainLWLockArray[18].lock)
+#define TablespaceCreateLock (&MainLWLockArray[19].lock)
+#define BtreeVacuumLock (&MainLWLockArray[20].lock)
+#define AddinShmemInitLock (&MainLWLockArray[21].lock)
+#define AutovacuumLock (&MainLWLockArray[22].lock)
+#define AutovacuumScheduleLock (&MainLWLockArray[23].lock)
+#define SyncScanLock (&MainLWLockArray[24].lock)
+#define RelationMappingLock (&MainLWLockArray[25].lock)
+#define AsyncCtlLock (&MainLWLockArray[26].lock)
+#define AsyncQueueLock (&MainLWLockArray[27].lock)
+#define SerializableXactHashLock (&MainLWLockArray[28].lock)
+#define SerializableFinishedListLock (&MainLWLockArray[29].lock)
+#define SerializablePredicateLockListLock (&MainLWLockArray[30].lock)
+#define OldSerXidLock (&MainLWLockArray[31].lock)
+#define SyncRepLock (&MainLWLockArray[32].lock)
+#define BackgroundWorkerLock (&MainLWLockArray[33].lock)
+#define DynamicSharedMemoryControlLock (&MainLWLockArray[34].lock)
+#define AutoFileLock (&MainLWLockArray[35].lock)
+#define NUM_INDIVIDUAL_LWLOCKS 36
+
/*
* It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS
- * here, but we need them to set up enum LWLockId correctly, and having
- * this file include lock.h or bufmgr.h would be backwards.
+ * here, but we need them to figure out offsets within MainLWLockArray, and
+ * having this file include lock.h or bufmgr.h would be backwards.
*/
/* Number of partitions of the shared buffer mapping hashtable */
@@ -31,68 +144,14 @@
#define LOG2_NUM_PREDICATELOCK_PARTITIONS 4
#define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
-/*
- * We have a number of predefined LWLocks, plus a bunch of LWLocks that are
- * dynamically assigned (e.g., for shared buffers). The LWLock structures
- * live in shared memory (since they contain shared data) and are identified
- * by values of this enumerated type. We abuse the notion of an enum somewhat
- * by allowing values not listed in the enum declaration to be assigned.
- * The extra value MaxDynamicLWLock is there to keep the compiler from
- * deciding that the enum can be represented as char or short ...
- *
- * If you remove a lock, please replace it with a placeholder. This retains
- * the lock numbering, which is helpful for DTrace and other external
- * debugging scripts.
- */
-typedef enum LWLockId
-{
- BufFreelistLock,
- ShmemIndexLock,
- OidGenLock,
- XidGenLock,
- ProcArrayLock,
- SInvalReadLock,
- SInvalWriteLock,
- WALBufMappingLock,
- WALWriteLock,
- ControlFileLock,
- CheckpointLock,
- CLogControlLock,
- SubtransControlLock,
- MultiXactGenLock,
- MultiXactOffsetControlLock,
- MultiXactMemberControlLock,
- RelCacheInitLock,
- CheckpointerCommLock,
- TwoPhaseStateLock,
- TablespaceCreateLock,
- BtreeVacuumLock,
- AddinShmemInitLock,
- AutovacuumLock,
- AutovacuumScheduleLock,
- SyncScanLock,
- RelationMappingLock,
- AsyncCtlLock,
- AsyncQueueLock,
- SerializableXactHashLock,
- SerializableFinishedListLock,
- SerializablePredicateLockListLock,
- OldSerXidLock,
- SyncRepLock,
- BackgroundWorkerLock,
- DynamicSharedMemoryControlLock,
- AutoFileLock,
- /* Individual lock IDs end here */
- FirstBufMappingLock,
- FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
- FirstPredicateLockMgrLock = FirstLockMgrLock + NUM_LOCK_PARTITIONS,
-
- /* must be last except for MaxDynamicLWLock: */
- NumFixedLWLocks = FirstPredicateLockMgrLock + NUM_PREDICATELOCK_PARTITIONS,
-
- MaxDynamicLWLock = 1000000000
-} LWLockId;
-
+/* Offsets for various chunks of preallocated lwlocks. */
+#define BUFFER_MAPPING_LWLOCK_OFFSET NUM_INDIVIDUAL_LWLOCKS
+#define LOCK_MANAGER_LWLOCK_OFFSET \
+ (BUFFER_MAPPING_LWLOCK_OFFSET + NUM_BUFFER_PARTITIONS)
+#define PREDICATELOCK_MANAGER_LWLOCK_OFFSET \
+ (NUM_INDIVIDUAL_LWLOCKS + NUM_LOCK_PARTITIONS)
+#define NUM_FIXED_LWLOCKS \
+ (PREDICATELOCK_MANAGER_LWLOCK_OFFSET + NUM_PREDICATELOCK_PARTITIONS)
typedef enum LWLockMode
{
@@ -108,18 +167,47 @@ typedef enum LWLockMode
extern bool Trace_lwlocks;
#endif
-extern LWLockId LWLockAssign(void);
-extern void LWLockAcquire(LWLockId lockid, LWLockMode mode);
-extern bool LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode);
-extern bool LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode);
-extern void LWLockRelease(LWLockId lockid);
+extern void LWLockAcquire(LWLock *lock, LWLockMode mode);
+extern bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode);
+extern bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode);
+extern void LWLockRelease(LWLock *lock);
extern void LWLockReleaseAll(void);
-extern bool LWLockHeldByMe(LWLockId lockid);
+extern bool LWLockHeldByMe(LWLock *lock);
-extern int NumLWLocks(void);
extern Size LWLockShmemSize(void);
extern void CreateLWLocks(void);
+/*
+ * The traditional method for obtaining an lwlock for use by an extension is
+ * to call RequestAddinLWLocks() during postmaster startup; this will reserve
+ * space for the indicated number of locks in MainLWLockArray. Subsequently,
+ * a lock can be allocated using LWLockAssign.
+ */
extern void RequestAddinLWLocks(int n);
+extern LWLock *LWLockAssign(void);
+
+/*
+ * There is another, more flexible method of obtaining lwlocks. First, call
+ * LWLockNewTrancheId just once to obtain a tranche ID; this allocates from
+ * a shared counter. Next, each individual process using the tranche should
+ * call LWLockRegisterTranche() to associate that tranche ID with appropriate
+ * metadata. Finally, LWLockInitialize should be called just once per lwlock,
+ * passing the tranche ID as an argument.
+ *
+ * It may seem strange that each process using the tranche must register it
+ * separately, but dynamic shared memory segments aren't guaranteed to be
+ * mapped at the same address in all coordinating backends, so storing the
+ * registration in the main shared memory segment wouldn't work for that case.
+ */
+extern int LWLockNewTrancheId(void);
+extern void LWLockRegisterTranche(int, LWLockTranche *);
+extern void LWLockInitialize(LWLock *, int tranche_id);
+
+/*
+ * Prior to PostgreSQL 9.4, we used an enum type called LWLockId to refer
+ * to LWLocks. New code should instead use LWLock *. However, for the
+ * convenience of third-party code, we include the following typedef.
+ */
+typedef LWLock *LWLockId;
#endif /* LWLOCK_H */
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index acdc6788bc..a3cadd9a01 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -131,7 +131,7 @@ struct PGPROC
struct XidCache subxids; /* cache for subtransaction XIDs */
/* Per-backend LWLock. Protects fields below. */
- LWLockId backendLock; /* protects the fields below */
+ LWLock *backendLock; /* protects the fields below */
/* Lock manager data, recording fast-path locks taken by this backend. */
uint64 fpLockBits; /* lock modes held for each fast-path slot */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 1f735b70b7..ad40735333 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -896,7 +896,6 @@ LPWSTR
LSEG
LVRelStats
LWLock
-LWLockId
LWLockMode
LWLockPadded
LabelProvider