diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c
index dbf8030f7c..1e2d192f11 100644
--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -116,7 +116,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
 		 * possible deadlocks.
 		 */
 		for (i = 0; i < NUM_BUFFER_PARTITIONS; i++)
-			LWLockAcquire(FirstBufMappingLock + i, LW_SHARED);
+			LWLockAcquire(BufMappingPartitionLockByIndex(i), LW_SHARED);
 
 		/*
 		 * Scan though all the buffers, saving the relevant fields in the
@@ -157,7 +157,7 @@ pg_buffercache_pages(PG_FUNCTION_ARGS)
 		 * avoids O(N^2) behavior inside LWLockRelease.
 		 */
 		for (i = NUM_BUFFER_PARTITIONS; --i >= 0;)
-			LWLockRelease(FirstBufMappingLock + i);
+			LWLockRelease(BufMappingPartitionLockByIndex(i));
 	}
 
 	funcctx = SRF_PERCALL_SETUP();
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 2f069b768e..858cce3457 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -150,7 +150,7 @@ typedef struct pgssEntry
  */
 typedef struct pgssSharedState
 {
-	LWLockId	lock;			/* protects hashtable search/modification */
+	LWLock	   *lock;			/* protects hashtable search/modification */
 	int			query_size;		/* max query length in bytes */
 	double		cur_median_usage;		/* current median usage in hashtable */
 } pgssSharedState;
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 4ec6981ab8..82eaf89a6b 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -2212,49 +2212,55 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
     </row>
     <row>
      <entry>lwlock-acquire</entry>
-     <entry>(LWLockId, LWLockMode)</entry>
+     <entry>(char *, int, LWLockMode)</entry>
      <entry>Probe that fires when an LWLock has been acquired.
-      arg0 is the LWLock's ID.
-      arg1 is the requested lock mode, either exclusive or shared.</entry>
+      arg0 is the LWLock's tranche.
+      arg1 is the LWLock's offset within its trance.
+      arg2 is the requested lock mode, either exclusive or shared.</entry>
     </row>
     <row>
      <entry>lwlock-release</entry>
-     <entry>(LWLockId)</entry>
+     <entry>(char *, int)</entry>
      <entry>Probe that fires when an LWLock has been released (but note
       that any released waiters have not yet been awakened).
-      arg0 is the LWLock's ID.</entry>
+      arg0 is the LWLock's tranche.
+      arg1 is the LWLock's offset within its trance.</entry>
     </row>
     <row>
      <entry>lwlock-wait-start</entry>
-     <entry>(LWLockId, LWLockMode)</entry>
+     <entry>(char *, int, LWLockMode)</entry>
      <entry>Probe that fires when an LWLock was not immediately available and
       a server process has begun to wait for the lock to become available.
-      arg0 is the LWLock's ID.
-      arg1 is the requested lock mode, either exclusive or shared.</entry>
+      arg0 is the LWLock's tranche.
+      arg1 is the LWLock's offset within its trance.
+      arg2 is the requested lock mode, either exclusive or shared.</entry>
     </row>
     <row>
      <entry>lwlock-wait-done</entry>
-     <entry>(LWLockId, LWLockMode)</entry>
+     <entry>(char *, int, LWLockMode)</entry>
      <entry>Probe that fires when a server process has been released from its
       wait for an LWLock (it does not actually have the lock yet).
-      arg0 is the LWLock's ID.
-      arg1 is the requested lock mode, either exclusive or shared.</entry>
+      arg0 is the LWLock's tranche.
+      arg1 is the LWLock's offset within its trance.
+      arg2 is the requested lock mode, either exclusive or shared.</entry>
     </row>
     <row>
      <entry>lwlock-condacquire</entry>
-     <entry>(LWLockId, LWLockMode)</entry>
+     <entry>(char *, int, LWLockMode)</entry>
      <entry>Probe that fires when an LWLock was successfully acquired when the
       caller specified no waiting.
-      arg0 is the LWLock's ID.
-      arg1 is the requested lock mode, either exclusive or shared.</entry>
+      arg0 is the LWLock's tranche.
+      arg1 is the LWLock's offset within its trance.
+      arg2 is the requested lock mode, either exclusive or shared.</entry>
     </row>
     <row>
      <entry>lwlock-condacquire-fail</entry>
-     <entry>(LWLockId, LWLockMode)</entry>
+     <entry>(char *, int, LWLockMode)</entry>
      <entry>Probe that fires when an LWLock was not successfully acquired when
       the caller specified no waiting.
-      arg0 is the LWLock's ID.
-      arg1 is the requested lock mode, either exclusive or shared.</entry>
+      arg0 is the LWLock's tranche.
+      arg1 is the LWLock's offset within its trance.
+      arg2 is the requested lock mode, either exclusive or shared.</entry>
     </row>
     <row>
      <entry>lock-wait-start</entry>
@@ -2299,10 +2305,6 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
      <entry>LocalTransactionId</entry>
      <entry>unsigned int</entry>
     </row>
-    <row>
-     <entry>LWLockId</entry>
-     <entry>int</entry>
-    </row>
     <row>
      <entry>LWLockMode</entry>
      <entry>int</entry>
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index f604aa9b60..b90db9a417 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -151,7 +151,7 @@ SimpleLruShmemSize(int nslots, int nlsns)
 	sz += MAXALIGN(nslots * sizeof(bool));		/* page_dirty[] */
 	sz += MAXALIGN(nslots * sizeof(int));		/* page_number[] */
 	sz += MAXALIGN(nslots * sizeof(int));		/* page_lru_count[] */
-	sz += MAXALIGN(nslots * sizeof(LWLockId));	/* buffer_locks[] */
+	sz += MAXALIGN(nslots * sizeof(LWLock *));	/* buffer_locks[] */
 
 	if (nlsns > 0)
 		sz += MAXALIGN(nslots * nlsns * sizeof(XLogRecPtr));	/* group_lsn[] */
@@ -161,7 +161,7 @@ SimpleLruShmemSize(int nslots, int nlsns)
 
 void
 SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
-			  LWLockId ctllock, const char *subdir)
+			  LWLock *ctllock, const char *subdir)
 {
 	SlruShared	shared;
 	bool		found;
@@ -202,8 +202,8 @@ SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
 		offset += MAXALIGN(nslots * sizeof(int));
 		shared->page_lru_count = (int *) (ptr + offset);
 		offset += MAXALIGN(nslots * sizeof(int));
-		shared->buffer_locks = (LWLockId *) (ptr + offset);
-		offset += MAXALIGN(nslots * sizeof(LWLockId));
+		shared->buffer_locks = (LWLock **) (ptr + offset);
+		offset += MAXALIGN(nslots * sizeof(LWLock *));
 
 		if (nlsns > 0)
 		{
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index b807b064be..52f87895a6 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -448,8 +448,6 @@ typedef struct
 typedef int InheritableSocket;
 #endif
 
-typedef struct LWLock LWLock;	/* ugly kluge */
-
 /*
  * Structure contains all variables passed to exec:ed backends
  */
@@ -473,7 +471,7 @@ typedef struct
 #ifndef HAVE_SPINLOCKS
 	PGSemaphore	SpinlockSemaArray;
 #endif
-	LWLock	   *LWLockArray;
+	LWLock	   *MainLWLockArray;
 	slock_t    *ProcStructLock;
 	PROC_HDR   *ProcGlobal;
 	PGPROC	   *AuxiliaryProcs;
@@ -5576,7 +5574,6 @@ PostmasterMarkPIDForWorkerNotify(int pid)
  * functions.  They are marked NON_EXEC_STATIC in their home modules.
  */
 extern slock_t *ShmemLock;
-extern LWLock *LWLockArray;
 extern slock_t *ProcStructLock;
 extern PGPROC *AuxiliaryProcs;
 extern PMSignalData *PMSignalState;
@@ -5625,7 +5622,7 @@ save_backend_variables(BackendParameters *param, Port *port,
 #ifndef HAVE_SPINLOCKS
 	param->SpinlockSemaArray = SpinlockSemaArray;
 #endif
-	param->LWLockArray = LWLockArray;
+	param->MainLWLockArray = MainLWLockArray;
 	param->ProcStructLock = ProcStructLock;
 	param->ProcGlobal = ProcGlobal;
 	param->AuxiliaryProcs = AuxiliaryProcs;
@@ -5856,7 +5853,7 @@ restore_backend_variables(BackendParameters *param, Port *port)
 #ifndef HAVE_SPINLOCKS
 	SpinlockSemaArray = param->SpinlockSemaArray;
 #endif
-	LWLockArray = param->LWLockArray;
+	MainLWLockArray = param->MainLWLockArray;
 	ProcStructLock = param->ProcStructLock;
 	ProcGlobal = param->ProcGlobal;
 	AuxiliaryProcs = param->AuxiliaryProcs;
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 91f0c7eb36..19eecab4c2 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -146,7 +146,7 @@ PrefetchBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
 	{
 		BufferTag	newTag;		/* identity of requested block */
 		uint32		newHash;	/* hash value for newTag */
-		LWLockId	newPartitionLock;	/* buffer partition lock for it */
+		LWLock	   *newPartitionLock;	/* buffer partition lock for it */
 		int			buf_id;
 
 		/* create a tag so we can lookup the buffer */
@@ -539,10 +539,10 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
 {
 	BufferTag	newTag;			/* identity of requested block */
 	uint32		newHash;		/* hash value for newTag */
-	LWLockId	newPartitionLock;		/* buffer partition lock for it */
+	LWLock	   *newPartitionLock;		/* buffer partition lock for it */
 	BufferTag	oldTag;			/* previous identity of selected buffer */
 	uint32		oldHash;		/* hash value for oldTag */
-	LWLockId	oldPartitionLock;		/* buffer partition lock for it */
+	LWLock	   *oldPartitionLock;		/* buffer partition lock for it */
 	BufFlags	oldFlags;
 	int			buf_id;
 	volatile BufferDesc *buf;
@@ -891,7 +891,7 @@ InvalidateBuffer(volatile BufferDesc *buf)
 {
 	BufferTag	oldTag;
 	uint32		oldHash;		/* hash value for oldTag */
-	LWLockId	oldPartitionLock;		/* buffer partition lock for it */
+	LWLock	   *oldPartitionLock;		/* buffer partition lock for it */
 	BufFlags	oldFlags;
 
 	/* Save the original buffer tag before dropping the spinlock */
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index cc21923709..2e717457b1 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -182,8 +182,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
 	 * Now initialize LWLocks, which do shared memory allocation and are
 	 * needed for InitShmemIndex.
 	 */
-	if (!IsUnderPostmaster)
-		CreateLWLocks();
+	CreateLWLocks();
 
 	/*
 	 * Set up shmem.c index hashtable
diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index 5c8b4b0656..6335129ac2 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -565,7 +565,7 @@ LockHasWaiters(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
 	LOCALLOCK  *locallock;
 	LOCK	   *lock;
 	PROCLOCK   *proclock;
-	LWLockId	partitionLock;
+	LWLock	   *partitionLock;
 	bool		hasWaiters = false;
 
 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
@@ -702,7 +702,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	bool		found;
 	ResourceOwner owner;
 	uint32		hashcode;
-	LWLockId	partitionLock;
+	LWLock	   *partitionLock;
 	int			status;
 	bool		log_lock = false;
 
@@ -1744,7 +1744,7 @@ LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock)
 	LOCALLOCK  *locallock;
 	LOCK	   *lock;
 	PROCLOCK   *proclock;
-	LWLockId	partitionLock;
+	LWLock	   *partitionLock;
 	bool		wakeupNeeded;
 
 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
@@ -2096,10 +2096,12 @@ LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks)
 	 */
 	for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++)
 	{
-		LWLockId	partitionLock = FirstLockMgrLock + partition;
+		LWLock	   *partitionLock;
 		SHM_QUEUE  *procLocks = &(MyProc->myProcLocks[partition]);
 		PROCLOCK   *nextplock;
 
+		partitionLock = LockHashPartitionLockByIndex(partition);
+
 		/*
 		 * If the proclock list for this partition is empty, we can skip
 		 * acquiring the partition lock.  This optimization is trickier than
@@ -2475,7 +2477,7 @@ static bool
 FastPathTransferRelationLocks(LockMethod lockMethodTable, const LOCKTAG *locktag,
 							  uint32 hashcode)
 {
-	LWLockId	partitionLock = LockHashPartitionLock(hashcode);
+	LWLock	   *partitionLock = LockHashPartitionLock(hashcode);
 	Oid			relid = locktag->locktag_field2;
 	uint32		i;
 
@@ -2565,7 +2567,7 @@ FastPathGetRelationLockEntry(LOCALLOCK *locallock)
 	LockMethod	lockMethodTable = LockMethods[DEFAULT_LOCKMETHOD];
 	LOCKTAG    *locktag = &locallock->tag.lock;
 	PROCLOCK   *proclock = NULL;
-	LWLockId	partitionLock = LockHashPartitionLock(locallock->hashcode);
+	LWLock	   *partitionLock = LockHashPartitionLock(locallock->hashcode);
 	Oid			relid = locktag->locktag_field2;
 	uint32		f;
 
@@ -2671,7 +2673,7 @@ GetLockConflicts(const LOCKTAG *locktag, LOCKMODE lockmode)
 	SHM_QUEUE  *procLocks;
 	PROCLOCK   *proclock;
 	uint32		hashcode;
-	LWLockId	partitionLock;
+	LWLock	   *partitionLock;
 	int			count = 0;
 	int			fast_count = 0;
 
@@ -2883,7 +2885,7 @@ LockRefindAndRelease(LockMethod lockMethodTable, PGPROC *proc,
 	PROCLOCKTAG proclocktag;
 	uint32		hashcode;
 	uint32		proclock_hashcode;
-	LWLockId	partitionLock;
+	LWLock	   *partitionLock;
 	bool		wakeupNeeded;
 
 	hashcode = LockTagHashCode(locktag);
@@ -3159,10 +3161,12 @@ PostPrepare_Locks(TransactionId xid)
 	 */
 	for (partition = 0; partition < NUM_LOCK_PARTITIONS; partition++)
 	{
-		LWLockId	partitionLock = FirstLockMgrLock + partition;
+		LWLock	   *partitionLock;
 		SHM_QUEUE  *procLocks = &(MyProc->myProcLocks[partition]);
 		PROCLOCK   *nextplock;
 
+		partitionLock = LockHashPartitionLockByIndex(partition);
+
 		/*
 		 * If the proclock list for this partition is empty, we can skip
 		 * acquiring the partition lock.  This optimization is safer than the
@@ -3400,7 +3404,7 @@ GetLockStatusData(void)
 	 * Must grab LWLocks in partition-number order to avoid LWLock deadlock.
 	 */
 	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
-		LWLockAcquire(FirstLockMgrLock + i, LW_SHARED);
+		LWLockAcquire(LockHashPartitionLockByIndex(i), LW_SHARED);
 
 	/* Now we can safely count the number of proclocks */
 	data->nelements = el + hash_get_num_entries(LockMethodProcLockHash);
@@ -3442,7 +3446,7 @@ GetLockStatusData(void)
 	 * behavior inside LWLockRelease.
 	 */
 	for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
-		LWLockRelease(FirstLockMgrLock + i);
+		LWLockRelease(LockHashPartitionLockByIndex(i));
 
 	Assert(el == data->nelements);
 
@@ -3477,7 +3481,7 @@ GetRunningTransactionLocks(int *nlocks)
 	 * Must grab LWLocks in partition-number order to avoid LWLock deadlock.
 	 */
 	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
-		LWLockAcquire(FirstLockMgrLock + i, LW_SHARED);
+		LWLockAcquire(LockHashPartitionLockByIndex(i), LW_SHARED);
 
 	/* Now we can safely count the number of proclocks */
 	els = hash_get_num_entries(LockMethodProcLockHash);
@@ -3537,7 +3541,7 @@ GetRunningTransactionLocks(int *nlocks)
 	 * behavior inside LWLockRelease.
 	 */
 	for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
-		LWLockRelease(FirstLockMgrLock + i);
+		LWLockRelease(LockHashPartitionLockByIndex(i));
 
 	*nlocks = index;
 	return accessExclusiveLocks;
@@ -3673,7 +3677,7 @@ lock_twophase_recover(TransactionId xid, uint16 info,
 	uint32		hashcode;
 	uint32		proclock_hashcode;
 	int			partition;
-	LWLockId	partitionLock;
+	LWLock	   *partitionLock;
 	LockMethod	lockMethodTable;
 
 	Assert(len == sizeof(TwoPhaseLockRecord));
@@ -4044,7 +4048,7 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait)
 	{
 		PROCLOCK   *proclock;
 		uint32		hashcode;
-		LWLockId	partitionLock;
+		LWLock	   *partitionLock;
 
 		hashcode = LockTagHashCode(&tag);
 
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index 0e319a7e6a..55d9d7837c 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -31,50 +31,37 @@
 #include "storage/predicate.h"
 #include "storage/proc.h"
 #include "storage/spin.h"
+#include "utils/memutils.h"
+
+#ifdef LWLOCK_STATS
+#include "utils/hsearch.h"
+#endif
 
 
 /* We use the ShmemLock spinlock to protect LWLockAssign */
 extern slock_t *ShmemLock;
 
-
-typedef struct LWLock
-{
-	slock_t		mutex;			/* Protects LWLock and queue of PGPROCs */
-	bool		releaseOK;		/* T if ok to release waiters */
-	char		exclusive;		/* # of exclusive holders (0 or 1) */
-	int			shared;			/* # of shared holders (0..MaxBackends) */
-	PGPROC	   *head;			/* head of list of waiting PGPROCs */
-	PGPROC	   *tail;			/* tail of list of waiting PGPROCs */
-	/* tail is undefined when head is NULL */
-} LWLock;
-
 /*
- * All the LWLock structs are allocated as an array in shared memory.
- * (LWLockIds are indexes into the array.)	We force the array stride to
- * be a power of 2, which saves a few cycles in indexing, but more
- * importantly also ensures that individual LWLocks don't cross cache line
- * boundaries.	This reduces cache contention problems, especially on AMD
- * Opterons.  (Of course, we have to also ensure that the array start
- * address is suitably aligned.)
- *
- * LWLock is between 16 and 32 bytes on all known platforms, so these two
- * cases are sufficient.
+ * This is indexed by tranche ID and stores metadata for all tranches known
+ * to the current backend.
  */
-#define LWLOCK_PADDED_SIZE	(sizeof(LWLock) <= 16 ? 16 : 32)
+static LWLockTranche **LWLockTrancheArray = NULL;
+static int LWLockTranchesAllocated = 0;
 
-typedef union LWLockPadded
-{
-	LWLock		lock;
-	char		pad[LWLOCK_PADDED_SIZE];
-} LWLockPadded;
+#define T_NAME(lock) \
+	(LWLockTrancheArray[(lock)->tranche]->name)
+#define T_ID(lock) \
+	((int) ((((char *) lock) - \
+		((char *) LWLockTrancheArray[(lock)->tranche]->array_base)) / \
+		LWLockTrancheArray[(lock)->tranche]->array_stride))
 
 /*
- * This points to the array of LWLocks in shared memory.  Backends inherit
+ * This points to the main array of LWLocks in shared memory.  Backends inherit
  * the pointer by fork from the postmaster (except in the EXEC_BACKEND case,
  * where we have special measures to pass it down).
  */
-NON_EXEC_STATIC LWLockPadded *LWLockArray = NULL;
-
+LWLockPadded *MainLWLockArray = NULL;
+static LWLockTranche MainLWLockTranche;
 
 /*
  * We use this structure to keep track of locked LWLocks for release
@@ -85,58 +72,78 @@ NON_EXEC_STATIC LWLockPadded *LWLockArray = NULL;
 #define MAX_SIMUL_LWLOCKS	100
 
 static int	num_held_lwlocks = 0;
-static LWLockId held_lwlocks[MAX_SIMUL_LWLOCKS];
+static LWLock *held_lwlocks[MAX_SIMUL_LWLOCKS];
 
 static int	lock_addin_request = 0;
 static bool lock_addin_request_allowed = true;
 
 #ifdef LWLOCK_STATS
+typedef struct lwlock_stats_key
+{
+	int		tranche;
+	int		instance;
+} lwlock_stats_key;
+
+typedef struct lwlock_stats
+{
+	lwlock_stats_key	key;
+	int		sh_acquire_count;
+	int		ex_acquire_count;
+	int		block_count;
+	int		spin_delay_count;
+} lwlock_stats;
+
 static int	counts_for_pid = 0;
-static int *sh_acquire_counts;
-static int *ex_acquire_counts;
-static int *block_counts;
-static int *spin_delay_counts;
+static HTAB *lwlock_stats_htab;
 #endif
 
 #ifdef LOCK_DEBUG
 bool		Trace_lwlocks = false;
 
 inline static void
-PRINT_LWDEBUG(const char *where, LWLockId lockid, const volatile LWLock *lock)
+PRINT_LWDEBUG(const char *where, const volatile LWLock *lock)
 {
 	if (Trace_lwlocks)
-		elog(LOG, "%s(%d): excl %d shared %d head %p rOK %d",
-			 where, (int) lockid,
+		elog(LOG, "%s(%s %d): excl %d shared %d head %p rOK %d",
+			 where, T_NAME(lock), T_ID(lock),
 			 (int) lock->exclusive, lock->shared, lock->head,
 			 (int) lock->releaseOK);
 }
 
 inline static void
-LOG_LWDEBUG(const char *where, LWLockId lockid, const char *msg)
+LOG_LWDEBUG(const char *where, const char *name, int index, const char *msg)
 {
 	if (Trace_lwlocks)
-		elog(LOG, "%s(%d): %s", where, (int) lockid, msg);
+		elog(LOG, "%s(%s %d): %s", where, name, index, msg);
 }
 #else							/* not LOCK_DEBUG */
-#define PRINT_LWDEBUG(a,b,c)
-#define LOG_LWDEBUG(a,b,c)
+#define PRINT_LWDEBUG(a,b)
+#define LOG_LWDEBUG(a,b,c,d)
 #endif   /* LOCK_DEBUG */
 
 #ifdef LWLOCK_STATS
 
 static void init_lwlock_stats(void);
 static void print_lwlock_stats(int code, Datum arg);
+static lwlock_stats *get_lwlock_stats_entry(LWLock *lockid);
 
 static void
 init_lwlock_stats(void)
 {
-	int		   *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
-	int			numLocks = LWLockCounter[1];
+	HASHCTL		ctl;
 
-	sh_acquire_counts = calloc(numLocks, sizeof(int));
-	ex_acquire_counts = calloc(numLocks, sizeof(int));
-	spin_delay_counts = calloc(numLocks, sizeof(int));
-	block_counts = calloc(numLocks, sizeof(int));
+	if (lwlock_stats_htab != NULL)
+	{
+		hash_destroy(lwlock_stats_htab);
+		lwlock_stats_htab = NULL;
+	}
+
+	MemSet(&ctl, 0, sizeof(ctl));
+	ctl.keysize = sizeof(lwlock_stats_key);
+	ctl.entrysize = sizeof(lwlock_stats);
+	ctl.hash = tag_hash;
+	lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
+									HASH_ELEM | HASH_FUNCTION);
 	counts_for_pid = MyProcPid;
 	on_shmem_exit(print_lwlock_stats, 0);
 }
@@ -144,30 +151,58 @@ init_lwlock_stats(void)
 static void
 print_lwlock_stats(int code, Datum arg)
 {
-	int			i;
-	int		   *LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
-	int			numLocks = LWLockCounter[1];
+	HASH_SEQ_STATUS scan;
+	lwlock_stats *lwstats;
+
+	hash_seq_init(&scan, lwlock_stats_htab);
 
 	/* Grab an LWLock to keep different backends from mixing reports */
-	LWLockAcquire(0, LW_EXCLUSIVE);
+	LWLockAcquire(&MainLWLockArray[0].lock, LW_EXCLUSIVE);
 
-	for (i = 0; i < numLocks; i++)
+	while ((lwstats = (lwlock_stats *) hash_seq_search(&scan)) != NULL)
 	{
-		if (sh_acquire_counts[i] || ex_acquire_counts[i] || block_counts[i] || spin_delay_counts[i])
-			fprintf(stderr, "PID %d lwlock %d: shacq %u exacq %u blk %u spindelay %u\n",
-					MyProcPid, i, sh_acquire_counts[i], ex_acquire_counts[i],
-					block_counts[i], spin_delay_counts[i]);
+		fprintf(stderr,
+				"PID %d lwlock %s %d: shacq %u exacq %u blk %u spindelay %u\n",
+				MyProcPid, LWLockTrancheArray[lwstats->key.tranche]->name,
+				lwstats->key.instance, lwstats->sh_acquire_count,
+				lwstats->ex_acquire_count, lwstats->block_count,
+				lwstats->spin_delay_count);
 	}
 
-	LWLockRelease(0);
+	LWLockRelease(&MainLWLockArray[0].lock);
+}
+
+static lwlock_stats *
+get_lwlock_stats_entry(LWLock *lock)
+{
+	lwlock_stats_key	key;
+	lwlock_stats *lwstats;
+	bool	found;
+
+	/* Set up local count state first time through in a given process */
+	if (counts_for_pid != MyProcPid)
+		init_lwlock_stats();
+
+	/* Fetch or create the entry. */
+	key.tranche = lock->tranche;
+	key.instance = T_ID(lock);
+	lwstats = hash_search(lwlock_stats_htab, &key, HASH_ENTER, &found);
+	if (!found)
+	{
+		lwstats->sh_acquire_count = 0;
+		lwstats->ex_acquire_count = 0;
+		lwstats->block_count = 0;
+		lwstats->spin_delay_count = 0;
+	}
+	return lwstats;
 }
 #endif   /* LWLOCK_STATS */
 
 
 /*
- * Compute number of LWLocks to allocate.
+ * Compute number of LWLocks to allocate in the main array.
  */
-int
+static int
 NumLWLocks(void)
 {
 	int			numLocks;
@@ -180,7 +215,7 @@ NumLWLocks(void)
 	 */
 
 	/* Predefined LWLocks */
-	numLocks = (int) NumFixedLWLocks;
+	numLocks = NUM_FIXED_LWLOCKS;
 
 	/* bufmgr.c needs two for each shared buffer */
 	numLocks += 2 * NBuffers;
@@ -248,56 +283,67 @@ LWLockShmemSize(void)
 	size = mul_size(numLocks, sizeof(LWLockPadded));
 
 	/* Space for dynamic allocation counter, plus room for alignment. */
-	size = add_size(size, 2 * sizeof(int) + LWLOCK_PADDED_SIZE);
+	size = add_size(size, 3 * sizeof(int) + LWLOCK_PADDED_SIZE);
 
 	return size;
 }
 
 
 /*
- * Allocate shmem space for LWLocks and initialize the locks.
+ * Allocate shmem space for the main LWLock array and initialize it.  We also
+ * register the main tranch here.
  */
 void
 CreateLWLocks(void)
 {
-	int			numLocks = NumLWLocks();
-	Size		spaceLocks = LWLockShmemSize();
-	LWLockPadded *lock;
-	int		   *LWLockCounter;
-	char	   *ptr;
-	int			id;
-
-	/* Allocate space */
-	ptr = (char *) ShmemAlloc(spaceLocks);
-
-	/* Leave room for dynamic allocation counter */
-	ptr += 2 * sizeof(int);
-
-	/* Ensure desired alignment of LWLock array */
-	ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
-
-	LWLockArray = (LWLockPadded *) ptr;
-
-	/*
-	 * Initialize all LWLocks to "unlocked" state
-	 */
-	for (id = 0, lock = LWLockArray; id < numLocks; id++, lock++)
+	if (!IsUnderPostmaster)
 	{
-		SpinLockInit(&lock->lock.mutex);
-		lock->lock.releaseOK = true;
-		lock->lock.exclusive = 0;
-		lock->lock.shared = 0;
-		lock->lock.head = NULL;
-		lock->lock.tail = NULL;
+		int			numLocks = NumLWLocks();
+		Size		spaceLocks = LWLockShmemSize();
+		LWLockPadded *lock;
+		int		   *LWLockCounter;
+		char	   *ptr;
+		int			id;
+
+		/* Allocate space */
+		ptr = (char *) ShmemAlloc(spaceLocks);
+
+		/* Leave room for dynamic allocation of locks and tranches */
+		ptr += 3 * sizeof(int);
+
+		/* Ensure desired alignment of LWLock array */
+		ptr += LWLOCK_PADDED_SIZE - ((uintptr_t) ptr) % LWLOCK_PADDED_SIZE;
+
+		MainLWLockArray = (LWLockPadded *) ptr;
+
+		/* Initialize all LWLocks in main array */
+		for (id = 0, lock = MainLWLockArray; id < numLocks; id++, lock++)
+			LWLockInitialize(&lock->lock, 0);
+
+		/*
+		 * Initialize the dynamic-allocation counters, which are stored just
+		 * before the first LWLock.  LWLockCounter[0] is the allocation
+		 * counter for lwlocks, LWLockCounter[1] is the maximum number that
+		 * can be allocated from the main array, and LWLockCounter[2] is the
+		 *  allocation counter for tranches.
+		 */
+		LWLockCounter = (int *) ((char *) MainLWLockArray - 3 * sizeof(int));
+		LWLockCounter[0] = NUM_FIXED_LWLOCKS;
+		LWLockCounter[1] = numLocks;
+		LWLockCounter[2] = 1;			/* 0 is the main array */
 	}
 
-	/*
-	 * Initialize the dynamic-allocation counter, which is stored just before
-	 * the first LWLock.
-	 */
-	LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
-	LWLockCounter[0] = (int) NumFixedLWLocks;
-	LWLockCounter[1] = numLocks;
+	if (LWLockTrancheArray == NULL)
+	{
+		LWLockTranchesAllocated = 16;
+		LWLockTrancheArray = MemoryContextAlloc(TopMemoryContext,
+			LWLockTranchesAllocated * sizeof(LWLockTranche *));
+	}
+
+	MainLWLockTranche.name = "main";
+	MainLWLockTranche.array_base = MainLWLockArray;
+	MainLWLockTranche.array_stride = sizeof(LWLockPadded);
+	LWLockRegisterTranche(0, &MainLWLockTranche);
 }
 
 
@@ -309,26 +355,86 @@ CreateLWLocks(void)
  * startup, but it is needed if any user-defined code tries to allocate
  * LWLocks after startup.
  */
-LWLockId
+LWLock *
 LWLockAssign(void)
 {
-	LWLockId	result;
+	LWLock	   *result;
 
 	/* use volatile pointer to prevent code rearrangement */
 	volatile int *LWLockCounter;
 
-	LWLockCounter = (int *) ((char *) LWLockArray - 2 * sizeof(int));
+	LWLockCounter = (int *) ((char *) MainLWLockArray - 3 * sizeof(int));
 	SpinLockAcquire(ShmemLock);
 	if (LWLockCounter[0] >= LWLockCounter[1])
 	{
 		SpinLockRelease(ShmemLock);
-		elog(ERROR, "no more LWLockIds available");
+		elog(ERROR, "no more LWLocks available");
 	}
-	result = (LWLockId) (LWLockCounter[0]++);
+	result = &MainLWLockArray[LWLockCounter[0]++].lock;
 	SpinLockRelease(ShmemLock);
 	return result;
 }
 
+/*
+ * Allocate a new tranche ID.
+ */
+int
+LWLockNewTrancheId(void)
+{
+	int			result;
+
+	/* use volatile pointer to prevent code rearrangement */
+	volatile int *LWLockCounter;
+
+	LWLockCounter = (int *) ((char *) MainLWLockArray - 3 * sizeof(int));
+	SpinLockAcquire(ShmemLock);
+	result = LWLockCounter[2]++;
+	SpinLockRelease(ShmemLock);
+
+	return result;
+}
+
+/*
+ * Register a tranche ID in the lookup table for the current process.  This
+ * routine will save a pointer to the tranche object passed as an argument,
+ * so that object should be allocated in a backend-lifetime context
+ * (TopMemoryContext, static variable, or similar).
+ */
+void
+LWLockRegisterTranche(int tranche_id, LWLockTranche *tranche)
+{
+	Assert(LWLockTrancheArray != NULL);
+
+	if (tranche_id >= LWLockTranchesAllocated)
+	{
+		int		i = LWLockTranchesAllocated;
+
+		while (i < tranche_id)
+			i *= 2;
+
+		LWLockTrancheArray = repalloc(LWLockTrancheArray,
+									  i * sizeof(LWLockTranche *));
+		LWLockTranchesAllocated = i;
+	}
+
+	LWLockTrancheArray[tranche_id] = tranche;
+}
+
+/*
+ * LWLockInitialize - initialize a new lwlock; it's initially unlocked
+ */
+void
+LWLockInitialize(LWLock *lock, int tranche_id)
+{
+	SpinLockInit(&lock->mutex);
+	lock->releaseOK = true;
+	lock->exclusive = 0;
+	lock->shared = 0;
+	lock->tranche = tranche_id;
+	lock->head = NULL;
+	lock->tail = NULL;
+}
+
 
 /*
  * LWLockAcquire - acquire a lightweight lock in the specified mode
@@ -338,24 +444,26 @@ LWLockAssign(void)
  * Side effect: cancel/die interrupts are held off until lock release.
  */
 void
-LWLockAcquire(LWLockId lockid, LWLockMode mode)
+LWLockAcquire(LWLock *l, LWLockMode mode)
 {
-	volatile LWLock *lock = &(LWLockArray[lockid].lock);
+	volatile LWLock *lock = l;
 	PGPROC	   *proc = MyProc;
 	bool		retry = false;
 	int			extraWaits = 0;
+#ifdef LWLOCK_STATS
+	lwlock_stats *lwstats;
+#endif
 
-	PRINT_LWDEBUG("LWLockAcquire", lockid, lock);
+	PRINT_LWDEBUG("LWLockAcquire", lock);
 
 #ifdef LWLOCK_STATS
-	/* Set up local count state first time through in a given process */
-	if (counts_for_pid != MyProcPid)
-		init_lwlock_stats();
+	lwstats = get_lwlock_stats_entry(l);
+
 	/* Count lock acquisition attempts */
 	if (mode == LW_EXCLUSIVE)
-		ex_acquire_counts[lockid]++;
+		lwstats->ex_acquire_count++;
 	else
-		sh_acquire_counts[lockid]++;
+		lwstats->sh_acquire_count++;
 #endif   /* LWLOCK_STATS */
 
 	/*
@@ -398,7 +506,7 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 
 		/* Acquire mutex.  Time spent holding mutex should be short! */
 #ifdef LWLOCK_STATS
-		spin_delay_counts[lockid] += SpinLockAcquire(&lock->mutex);
+		lwstats->spin_delay_count += SpinLockAcquire(&lock->mutex);
 #else
 		SpinLockAcquire(&lock->mutex);
 #endif
@@ -466,13 +574,13 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 		 * so that the lock manager or signal manager will see the received
 		 * signal when it next waits.
 		 */
-		LOG_LWDEBUG("LWLockAcquire", lockid, "waiting");
+		LOG_LWDEBUG("LWLockAcquire", T_NAME(l), T_ID(l), "waiting");
 
 #ifdef LWLOCK_STATS
-		block_counts[lockid]++;
+		lwstats->block_count++;
 #endif
 
-		TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode);
+		TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(l), T_ID(l), mode);
 
 		for (;;)
 		{
@@ -483,9 +591,9 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 			extraWaits++;
 		}
 
-		TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode);
+		TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(l), T_ID(l), mode);
 
-		LOG_LWDEBUG("LWLockAcquire", lockid, "awakened");
+		LOG_LWDEBUG("LWLockAcquire", T_NAME(l), T_ID(l), "awakened");
 
 		/* Now loop back and try to acquire lock again. */
 		retry = true;
@@ -494,10 +602,10 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
 	/* We are done updating shared state of the lock itself. */
 	SpinLockRelease(&lock->mutex);
 
-	TRACE_POSTGRESQL_LWLOCK_ACQUIRE(lockid, mode);
+	TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(l), T_ID(l), mode);
 
 	/* Add lock to list of locks held by this backend */
-	held_lwlocks[num_held_lwlocks++] = lockid;
+	held_lwlocks[num_held_lwlocks++] = l;
 
 	/*
 	 * Fix the process wait semaphore's count for any absorbed wakeups.
@@ -514,12 +622,12 @@ LWLockAcquire(LWLockId lockid, LWLockMode mode)
  * If successful, cancel/die interrupts are held off until lock release.
  */
 bool
-LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
+LWLockConditionalAcquire(LWLock *l, LWLockMode mode)
 {
-	volatile LWLock *lock = &(LWLockArray[lockid].lock);
+	volatile LWLock *lock = l;
 	bool		mustwait;
 
-	PRINT_LWDEBUG("LWLockConditionalAcquire", lockid, lock);
+	PRINT_LWDEBUG("LWLockConditionalAcquire", lock);
 
 	/* Ensure we will have room to remember the lock */
 	if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
@@ -564,14 +672,14 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
 	{
 		/* Failed to get lock, so release interrupt holdoff */
 		RESUME_INTERRUPTS();
-		LOG_LWDEBUG("LWLockConditionalAcquire", lockid, "failed");
-		TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(lockid, mode);
+		LOG_LWDEBUG("LWLockConditionalAcquire", T_NAME(l), T_ID(l), "failed");
+		TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE_FAIL(T_NAME(l), T_ID(l), mode);
 	}
 	else
 	{
 		/* Add lock to list of locks held by this backend */
-		held_lwlocks[num_held_lwlocks++] = lockid;
-		TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(lockid, mode);
+		held_lwlocks[num_held_lwlocks++] = l;
+		TRACE_POSTGRESQL_LWLOCK_CONDACQUIRE(T_NAME(l), T_ID(l), mode);
 	}
 
 	return !mustwait;
@@ -592,19 +700,20 @@ LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode)
  * wake up, observe that their records have already been flushed, and return.
  */
 bool
-LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
+LWLockAcquireOrWait(LWLock *l, LWLockMode mode)
 {
-	volatile LWLock *lock = &(LWLockArray[lockid].lock);
+	volatile LWLock *lock = l;
 	PGPROC	   *proc = MyProc;
 	bool		mustwait;
 	int			extraWaits = 0;
+#ifdef LWLOCK_STATS
+	lwlock_stats *lwstats;
+#endif
 
-	PRINT_LWDEBUG("LWLockAcquireOrWait", lockid, lock);
+	PRINT_LWDEBUG("LWLockAcquireOrWait", lock);
 
 #ifdef LWLOCK_STATS
-	/* Set up local count state first time through in a given process */
-	if (counts_for_pid != MyProcPid)
-		init_lwlock_stats();
+	lwstats = get_lwlock_stats_entry(l);
 #endif
 
 	/* Ensure we will have room to remember the lock */
@@ -671,13 +780,13 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
 		 * Wait until awakened.  Like in LWLockAcquire, be prepared for bogus
 		 * wakups, because we share the semaphore with ProcWaitForSignal.
 		 */
-		LOG_LWDEBUG("LWLockAcquireOrWait", lockid, "waiting");
+		LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(l), T_ID(l), "waiting");
 
 #ifdef LWLOCK_STATS
-		block_counts[lockid]++;
+		lwstats->block_count++;
 #endif
 
-		TRACE_POSTGRESQL_LWLOCK_WAIT_START(lockid, mode);
+		TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(l), T_ID(l), mode);
 
 		for (;;)
 		{
@@ -688,9 +797,9 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
 			extraWaits++;
 		}
 
-		TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(lockid, mode);
+		TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(l), T_ID(l), mode);
 
-		LOG_LWDEBUG("LWLockAcquireOrWait", lockid, "awakened");
+		LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(l), T_ID(l), "awakened");
 	}
 	else
 	{
@@ -708,14 +817,14 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
 	{
 		/* Failed to get lock, so release interrupt holdoff */
 		RESUME_INTERRUPTS();
-		LOG_LWDEBUG("LWLockAcquireOrWait", lockid, "failed");
-		TRACE_POSTGRESQL_LWLOCK_WAIT_UNTIL_FREE_FAIL(lockid, mode);
+		LOG_LWDEBUG("LWLockAcquireOrWait", T_NAME(l), T_ID(l), "failed");
+		TRACE_POSTGRESQL_LWLOCK_WAIT_UNTIL_FREE_FAIL(T_NAME(l), T_ID(l), mode);
 	}
 	else
 	{
 		/* Add lock to list of locks held by this backend */
-		held_lwlocks[num_held_lwlocks++] = lockid;
-		TRACE_POSTGRESQL_LWLOCK_WAIT_UNTIL_FREE(lockid, mode);
+		held_lwlocks[num_held_lwlocks++] = l;
+		TRACE_POSTGRESQL_LWLOCK_WAIT_UNTIL_FREE(T_NAME(l), T_ID(l), mode);
 	}
 
 	return !mustwait;
@@ -725,14 +834,14 @@ LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode)
  * LWLockRelease - release a previously acquired lock
  */
 void
-LWLockRelease(LWLockId lockid)
+LWLockRelease(LWLock *l)
 {
-	volatile LWLock *lock = &(LWLockArray[lockid].lock);
+	volatile LWLock *lock = l;
 	PGPROC	   *head;
 	PGPROC	   *proc;
 	int			i;
 
-	PRINT_LWDEBUG("LWLockRelease", lockid, lock);
+	PRINT_LWDEBUG("LWLockRelease", lock);
 
 	/*
 	 * Remove lock from list of locks held.  Usually, but not always, it will
@@ -740,11 +849,11 @@ LWLockRelease(LWLockId lockid)
 	 */
 	for (i = num_held_lwlocks; --i >= 0;)
 	{
-		if (lockid == held_lwlocks[i])
+		if (l == held_lwlocks[i])
 			break;
 	}
 	if (i < 0)
-		elog(ERROR, "lock %d is not held", (int) lockid);
+		elog(ERROR, "lock %s %d is not held", T_NAME(l), T_ID(l));
 	num_held_lwlocks--;
 	for (; i < num_held_lwlocks; i++)
 		held_lwlocks[i] = held_lwlocks[i + 1];
@@ -824,14 +933,14 @@ LWLockRelease(LWLockId lockid)
 	/* We are done updating shared state of the lock itself. */
 	SpinLockRelease(&lock->mutex);
 
-	TRACE_POSTGRESQL_LWLOCK_RELEASE(lockid);
+	TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(l), T_ID(l));
 
 	/*
 	 * Awaken any waiters I removed from the queue.
 	 */
 	while (head != NULL)
 	{
-		LOG_LWDEBUG("LWLockRelease", lockid, "release waiter");
+		LOG_LWDEBUG("LWLockRelease", T_NAME(l), T_ID(l), "release waiter");
 		proc = head;
 		head = proc->lwWaitLink;
 		proc->lwWaitLink = NULL;
@@ -874,13 +983,13 @@ LWLockReleaseAll(void)
  * lock is held shared or exclusive.
  */
 bool
-LWLockHeldByMe(LWLockId lockid)
+LWLockHeldByMe(LWLock *l)
 {
 	int			i;
 
 	for (i = 0; i < num_held_lwlocks; i++)
 	{
-		if (held_lwlocks[i] == lockid)
+		if (held_lwlocks[i] == l)
 			return true;
 	}
 	return false;
diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c
index e7f44cce84..6700072027 100644
--- a/src/backend/storage/lmgr/predicate.c
+++ b/src/backend/storage/lmgr/predicate.c
@@ -241,7 +241,10 @@
 #define PredicateLockHashPartition(hashcode) \
 	((hashcode) % NUM_PREDICATELOCK_PARTITIONS)
 #define PredicateLockHashPartitionLock(hashcode) \
-	((LWLockId) (FirstPredicateLockMgrLock + PredicateLockHashPartition(hashcode)))
+	(&MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + \
+		PredicateLockHashPartition(hashcode)].lock)
+#define PredicateLockHashPartitionLockByIndex(i) \
+	(&MainLWLockArray[PREDICATELOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)
 
 #define NPREDICATELOCKTARGETENTS() \
 	mul_size(max_predicate_locks_per_xact, add_size(MaxBackends, max_prepared_xacts))
@@ -383,7 +386,7 @@ static SHM_QUEUE *FinishedSerializableTransactions;
  */
 static const PREDICATELOCKTARGETTAG ScratchTargetTag = {0, 0, 0, 0};
 static uint32 ScratchTargetTagHash;
-static int	ScratchPartitionLock;
+static LWLock *ScratchPartitionLock;
 
 /*
  * The local hash table used to determine when to combine multiple fine-
@@ -1398,7 +1401,7 @@ GetPredicateLockStatusData(void)
 	 * in ascending order, then SerializableXactHashLock.
 	 */
 	for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
-		LWLockAcquire(FirstPredicateLockMgrLock + i, LW_SHARED);
+		LWLockAcquire(PredicateLockHashPartitionLockByIndex(i), LW_SHARED);
 	LWLockAcquire(SerializableXactHashLock, LW_SHARED);
 
 	/* Get number of locks and allocate appropriately-sized arrays. */
@@ -1427,7 +1430,7 @@ GetPredicateLockStatusData(void)
 	/* Release locks in reverse order */
 	LWLockRelease(SerializableXactHashLock);
 	for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
-		LWLockRelease(FirstPredicateLockMgrLock + i);
+		LWLockRelease(PredicateLockHashPartitionLockByIndex(i));
 
 	return data;
 }
@@ -1856,7 +1859,7 @@ PageIsPredicateLocked(Relation relation, BlockNumber blkno)
 {
 	PREDICATELOCKTARGETTAG targettag;
 	uint32		targettaghash;
-	LWLockId	partitionLock;
+	LWLock	   *partitionLock;
 	PREDICATELOCKTARGET *target;
 
 	SET_PREDICATELOCKTARGETTAG_PAGE(targettag,
@@ -2089,7 +2092,7 @@ DeleteChildTargetLocks(const PREDICATELOCKTARGETTAG *newtargettag)
 		if (TargetTagIsCoveredBy(oldtargettag, *newtargettag))
 		{
 			uint32		oldtargettaghash;
-			LWLockId	partitionLock;
+			LWLock	    *partitionLock;
 			PREDICATELOCK *rmpredlock PG_USED_FOR_ASSERTS_ONLY;
 
 			oldtargettaghash = PredicateLockTargetTagHashCode(&oldtargettag);
@@ -2301,7 +2304,7 @@ CreatePredicateLock(const PREDICATELOCKTARGETTAG *targettag,
 	PREDICATELOCKTARGET *target;
 	PREDICATELOCKTAG locktag;
 	PREDICATELOCK *lock;
-	LWLockId	partitionLock;
+	LWLock	   *partitionLock;
 	bool		found;
 
 	partitionLock = PredicateLockHashPartitionLock(targettaghash);
@@ -2599,10 +2602,10 @@ TransferPredicateLocksToNewTarget(PREDICATELOCKTARGETTAG oldtargettag,
 								  bool removeOld)
 {
 	uint32		oldtargettaghash;
-	LWLockId	oldpartitionLock;
+	LWLock	   *oldpartitionLock;
 	PREDICATELOCKTARGET *oldtarget;
 	uint32		newtargettaghash;
-	LWLockId	newpartitionLock;
+	LWLock	   *newpartitionLock;
 	bool		found;
 	bool		outOfShmem = false;
 
@@ -2858,7 +2861,7 @@ DropAllPredicateLocksFromTable(Relation relation, bool transfer)
 	/* Acquire locks on all lock partitions */
 	LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE);
 	for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
-		LWLockAcquire(FirstPredicateLockMgrLock + i, LW_EXCLUSIVE);
+		LWLockAcquire(PredicateLockHashPartitionLockByIndex(i), LW_EXCLUSIVE);
 	LWLockAcquire(SerializableXactHashLock, LW_EXCLUSIVE);
 
 	/*
@@ -2996,7 +2999,7 @@ DropAllPredicateLocksFromTable(Relation relation, bool transfer)
 	/* Release locks in reverse order */
 	LWLockRelease(SerializableXactHashLock);
 	for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
-		LWLockRelease(FirstPredicateLockMgrLock + i);
+		LWLockRelease(PredicateLockHashPartitionLockByIndex(i));
 	LWLockRelease(SerializablePredicateLockListLock);
 }
 
@@ -3611,7 +3614,7 @@ ClearOldPredicateLocks(void)
 			PREDICATELOCKTARGET *target;
 			PREDICATELOCKTARGETTAG targettag;
 			uint32		targettaghash;
-			LWLockId	partitionLock;
+			LWLock	   *partitionLock;
 
 			tag = predlock->tag;
 			target = tag.myTarget;
@@ -3690,7 +3693,7 @@ ReleaseOneSerializableXact(SERIALIZABLEXACT *sxact, bool partial,
 		PREDICATELOCKTARGET *target;
 		PREDICATELOCKTARGETTAG targettag;
 		uint32		targettaghash;
-		LWLockId	partitionLock;
+		LWLock	   *partitionLock;
 
 		nextpredlock = (PREDICATELOCK *)
 			SHMQueueNext(&(sxact->predicateLocks),
@@ -4068,7 +4071,7 @@ static void
 CheckTargetForConflictsIn(PREDICATELOCKTARGETTAG *targettag)
 {
 	uint32		targettaghash;
-	LWLockId	partitionLock;
+	LWLock	   *partitionLock;
 	PREDICATELOCKTARGET *target;
 	PREDICATELOCK *predlock;
 	PREDICATELOCK *mypredlock = NULL;
@@ -4360,7 +4363,7 @@ CheckTableForSerializableConflictIn(Relation relation)
 
 	LWLockAcquire(SerializablePredicateLockListLock, LW_EXCLUSIVE);
 	for (i = 0; i < NUM_PREDICATELOCK_PARTITIONS; i++)
-		LWLockAcquire(FirstPredicateLockMgrLock + i, LW_SHARED);
+		LWLockAcquire(PredicateLockHashPartitionLockByIndex(i), LW_SHARED);
 	LWLockAcquire(SerializableXactHashLock, LW_SHARED);
 
 	/* Scan through target list */
@@ -4407,7 +4410,7 @@ CheckTableForSerializableConflictIn(Relation relation)
 	/* Release locks in reverse order */
 	LWLockRelease(SerializableXactHashLock);
 	for (i = NUM_PREDICATELOCK_PARTITIONS - 1; i >= 0; i--)
-		LWLockRelease(FirstPredicateLockMgrLock + i);
+		LWLockRelease(PredicateLockHashPartitionLockByIndex(i));
 	LWLockRelease(SerializablePredicateLockListLock);
 }
 
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index ee6c24cea7..1a683b8336 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -189,7 +189,8 @@ InitProcGlobal(void)
 	 */
 	procs = (PGPROC *) ShmemAlloc(TotalProcs * sizeof(PGPROC));
 	ProcGlobal->allProcs = procs;
-	ProcGlobal->allProcCount = TotalProcs;
+	/* XXX allProcCount isn't really all of them; it excludes prepared xacts */
+	ProcGlobal->allProcCount = MaxBackends + NUM_AUXILIARY_PROCS;
 	if (!procs)
 		ereport(FATAL,
 				(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -663,7 +664,7 @@ IsWaitingForLock(void)
 void
 LockErrorCleanup(void)
 {
-	LWLockId	partitionLock;
+	LWLock	   *partitionLock;
 	DisableTimeoutParams timeouts[2];
 
 	AbortStrongLockAcquire();
@@ -942,7 +943,7 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 	LOCK	   *lock = locallock->lock;
 	PROCLOCK   *proclock = locallock->proclock;
 	uint32		hashcode = locallock->hashcode;
-	LWLockId	partitionLock = LockHashPartitionLock(hashcode);
+	LWLock	   *partitionLock = LockHashPartitionLock(hashcode);
 	PROC_QUEUE *waitQueue = &(lock->waitProcs);
 	LOCKMASK	myHeldLocks = MyProc->heldLocks;
 	bool		early_deadlock = false;
@@ -1440,7 +1441,7 @@ CheckDeadLock(void)
 	 * interrupts.
 	 */
 	for (i = 0; i < NUM_LOCK_PARTITIONS; i++)
-		LWLockAcquire(FirstLockMgrLock + i, LW_EXCLUSIVE);
+		LWLockAcquire(LockHashPartitionLockByIndex(i), LW_EXCLUSIVE);
 
 	/*
 	 * Check to see if we've been awoken by anyone in the interim.
@@ -1522,7 +1523,7 @@ CheckDeadLock(void)
 	 */
 check_done:
 	for (i = NUM_LOCK_PARTITIONS; --i >= 0;)
-		LWLockRelease(FirstLockMgrLock + i);
+		LWLockRelease(LockHashPartitionLockByIndex(i));
 }
 
 
diff --git a/src/backend/utils/probes.d b/src/backend/utils/probes.d
index 17c8e15f35..804ba6ae62 100644
--- a/src/backend/utils/probes.d
+++ b/src/backend/utils/probes.d
@@ -15,7 +15,6 @@
  * in probe definitions, as they cause compilation errors on Mac OS X 10.5.
  */
 #define LocalTransactionId unsigned int
-#define LWLockId int
 #define LWLockMode int
 #define LOCKMODE int
 #define BlockNumber unsigned int
@@ -29,14 +28,14 @@ provider postgresql {
 	probe transaction__commit(LocalTransactionId);
 	probe transaction__abort(LocalTransactionId);
 
-	probe lwlock__acquire(LWLockId, LWLockMode);
-	probe lwlock__release(LWLockId);
-	probe lwlock__wait__start(LWLockId, LWLockMode);
-	probe lwlock__wait__done(LWLockId, LWLockMode);
-	probe lwlock__condacquire(LWLockId, LWLockMode);
-	probe lwlock__condacquire__fail(LWLockId, LWLockMode);
-	probe lwlock__wait__until__free(LWLockId, LWLockMode);
-	probe lwlock__wait__until__free__fail(LWLockId, LWLockMode);
+	probe lwlock__acquire(const char *, int, LWLockMode);
+	probe lwlock__release(const char *, int);
+	probe lwlock__wait__start(const char *, int, LWLockMode);
+	probe lwlock__wait__done(const char *, int, LWLockMode);
+	probe lwlock__condacquire(const char *, int, LWLockMode);
+	probe lwlock__condacquire__fail(const char *, int, LWLockMode);
+	probe lwlock__wait__until__free(const char *, int, LWLockMode);
+	probe lwlock__wait__until__free__fail(const char *, int, LWLockMode);
 
 	probe lock__wait__start(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, LOCKMODE);
 	probe lock__wait__done(unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, LOCKMODE);
diff --git a/src/include/access/slru.h b/src/include/access/slru.h
index 4ec11b1ec2..c7b4186ffa 100644
--- a/src/include/access/slru.h
+++ b/src/include/access/slru.h
@@ -55,7 +55,7 @@ typedef enum
  */
 typedef struct SlruSharedData
 {
-	LWLockId	ControlLock;
+	LWLock     *ControlLock;
 
 	/* Number of buffers managed by this SLRU structure */
 	int			num_slots;
@@ -69,7 +69,7 @@ typedef struct SlruSharedData
 	bool	   *page_dirty;
 	int		   *page_number;
 	int		   *page_lru_count;
-	LWLockId   *buffer_locks;
+	LWLock    **buffer_locks;
 
 	/*
 	 * Optional array of WAL flush LSNs associated with entries in the SLRU
@@ -136,7 +136,7 @@ typedef SlruCtlData *SlruCtl;
 
 extern Size SimpleLruShmemSize(int nslots, int nlsns);
 extern void SimpleLruInit(SlruCtl ctl, const char *name, int nslots, int nlsns,
-			  LWLockId ctllock, const char *subdir);
+			  LWLock *ctllock, const char *subdir);
 extern int	SimpleLruZeroPage(SlruCtl ctl, int pageno);
 extern int SimpleLruReadPage(SlruCtl ctl, int pageno, bool write_ok,
 				  TransactionId xid);
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index 457390fc87..93a0030c3e 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -104,7 +104,10 @@ typedef struct buftag
 #define BufTableHashPartition(hashcode) \
 	((hashcode) % NUM_BUFFER_PARTITIONS)
 #define BufMappingPartitionLock(hashcode) \
-	((LWLockId) (FirstBufMappingLock + BufTableHashPartition(hashcode)))
+	(&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + \
+		BufTableHashPartition(hashcode)].lock)
+#define BufMappingPartitionLockByIndex(i) \
+	(&MainLWLockArray[BUFFER_MAPPING_LWLOCK_OFFSET + (i)].lock)
 
 /*
  *	BufferDesc -- shared descriptor/state data for a single shared buffer.
@@ -144,8 +147,8 @@ typedef struct sbufdesc
 	int			buf_id;			/* buffer's index number (from 0) */
 	int			freeNext;		/* link in freelist chain */
 
-	LWLockId	io_in_progress_lock;	/* to wait for I/O to complete */
-	LWLockId	content_lock;	/* to lock access to buffer contents */
+	LWLock     *io_in_progress_lock;	/* to wait for I/O to complete */
+	LWLock	   *content_lock;	/* to lock access to buffer contents */
 } BufferDesc;
 
 #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index f6a2029e2a..ceeab9fc8a 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -483,8 +483,10 @@ typedef enum
 #define LockHashPartition(hashcode) \
 	((hashcode) % NUM_LOCK_PARTITIONS)
 #define LockHashPartitionLock(hashcode) \
-	((LWLockId) (FirstLockMgrLock + LockHashPartition(hashcode)))
-
+	(&MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + \
+		LockHashPartition(hashcode)].lock)
+#define LockHashPartitionLockByIndex(i) \
+	(&MainLWLockArray[LOCK_MANAGER_LWLOCK_OFFSET + (i)].lock)
 
 /*
  * function prototypes
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index efdb8b5faf..4507926274 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -14,10 +14,123 @@
 #ifndef LWLOCK_H
 #define LWLOCK_H
 
+#include "storage/s_lock.h"
+
+struct PGPROC;
+
+/*
+ * It's occasionally necessary to identify a particular LWLock "by name"; e.g.
+ * because we wish to report the lock to dtrace.  We could store a name or
+ * other identifying information in the lock itself, but since it's common
+ * to have many nearly-identical locks (e.g. one per buffer) this would end
+ * up wasting significant amounts of memory.  Instead, each lwlock stores a
+ * tranche ID which tells us which array it's part of.  Based on that, we can
+ * figure out where the lwlock lies within the array using the data structure
+ * shown below; the lock is then identified based on the tranche name and
+ * computed array index.  We need the array stride because the array might not
+ * be an array of lwlocks, but rather some larger data structure that includes
+ * one or more lwlocks per element.
+ */
+typedef struct LWLockTranche
+{
+	const char *name;
+	void	   *array_base;
+	Size		array_stride;
+} LWLockTranche;
+
+/*
+ * Code outside of lwlock.c should not manipulate the contents of this
+ * structure directly, but we have to declare it here to allow LWLocks to be
+ * incorporated into other data structures.
+ */
+typedef struct LWLock
+{
+	slock_t		mutex;			/* Protects LWLock and queue of PGPROCs */
+	bool		releaseOK;		/* T if ok to release waiters */
+	char		exclusive;		/* # of exclusive holders (0 or 1) */
+	int			shared;			/* # of shared holders (0..MaxBackends) */
+	int			tranche;		/* tranche ID */
+	struct PGPROC *head;			/* head of list of waiting PGPROCs */
+	struct PGPROC *tail;			/* tail of list of waiting PGPROCs */
+	/* tail is undefined when head is NULL */
+} LWLock;
+
+/*
+ * Prior to PostgreSQL 9.4, every lightweight lock in the system was stored
+ * in a single array.  For convenience and for compatibility with past
+ * releases, we still have a main array, but it's now also permissible to
+ * store LWLocks elsewhere in the main shared memory segment or in a dynamic
+ * shared memory segment.  In the main array, we force the array stride to
+ * be a power of 2, which saves a few cycles in indexing, but more importantly
+ * also ensures that individual LWLocks don't cross cache line boundaries.
+ * This reduces cache contention problems, especially on AMD Opterons.
+ * (Of course, we have to also ensure that the array start address is suitably
+ * aligned.)
+ *
+ * Even on a 32-bit platform, an lwlock will be more than 16 bytes, because
+ * it contains 2 integers and 2 pointers, plus other stuff.  It should fit
+ * into 32 bytes, though, unless slock_t is really big.  On a 64-bit platform,
+ * it should fit into 32 bytes unless slock_t is larger than 4 bytes.  We
+ * allow for that just in case.
+ */
+#define LWLOCK_PADDED_SIZE	(sizeof(LWLock) <= 32 ? 32 : 64)
+
+typedef union LWLockPadded
+{
+	LWLock		lock;
+	char		pad[LWLOCK_PADDED_SIZE];
+} LWLockPadded;
+extern LWLockPadded *MainLWLockArray;
+
+/*
+ * Some commonly-used locks have predefined positions within MainLWLockArray;
+ * defining macros here makes it much easier to keep track of these.  If you
+ * add a lock, add it to the end to avoid renumbering the existing locks;
+ * if you remove a lock, consider leaving a gap in the numbering sequence for
+ * the benefit of DTrace and other external debugging scripts.
+ */
+#define BufFreelistLock				(&MainLWLockArray[0].lock)
+#define ShmemIndexLock				(&MainLWLockArray[1].lock)
+#define OidGenLock					(&MainLWLockArray[2].lock)
+#define XidGenLock					(&MainLWLockArray[3].lock)
+#define ProcArrayLock				(&MainLWLockArray[4].lock)
+#define SInvalReadLock				(&MainLWLockArray[5].lock)
+#define SInvalWriteLock				(&MainLWLockArray[6].lock)
+#define WALBufMappingLock			(&MainLWLockArray[7].lock)
+#define WALWriteLock				(&MainLWLockArray[8].lock)
+#define ControlFileLock				(&MainLWLockArray[9].lock)
+#define CheckpointLock				(&MainLWLockArray[10].lock)
+#define CLogControlLock				(&MainLWLockArray[11].lock)
+#define SubtransControlLock			(&MainLWLockArray[12].lock)
+#define MultiXactGenLock			(&MainLWLockArray[13].lock)
+#define MultiXactOffsetControlLock	(&MainLWLockArray[14].lock)
+#define MultiXactMemberControlLock	(&MainLWLockArray[15].lock)
+#define RelCacheInitLock			(&MainLWLockArray[16].lock)
+#define CheckpointerCommLock		(&MainLWLockArray[17].lock)
+#define TwoPhaseStateLock			(&MainLWLockArray[18].lock)
+#define TablespaceCreateLock		(&MainLWLockArray[19].lock)
+#define BtreeVacuumLock				(&MainLWLockArray[20].lock)
+#define AddinShmemInitLock			(&MainLWLockArray[21].lock)
+#define AutovacuumLock				(&MainLWLockArray[22].lock)
+#define AutovacuumScheduleLock		(&MainLWLockArray[23].lock)
+#define SyncScanLock				(&MainLWLockArray[24].lock)
+#define RelationMappingLock			(&MainLWLockArray[25].lock)
+#define AsyncCtlLock				(&MainLWLockArray[26].lock)
+#define AsyncQueueLock				(&MainLWLockArray[27].lock)
+#define SerializableXactHashLock	(&MainLWLockArray[28].lock)
+#define SerializableFinishedListLock		(&MainLWLockArray[29].lock)
+#define SerializablePredicateLockListLock	(&MainLWLockArray[30].lock)
+#define OldSerXidLock				(&MainLWLockArray[31].lock)
+#define SyncRepLock					(&MainLWLockArray[32].lock)
+#define BackgroundWorkerLock		(&MainLWLockArray[33].lock)
+#define DynamicSharedMemoryControlLock		(&MainLWLockArray[34].lock)
+#define AutoFileLock				(&MainLWLockArray[35].lock)
+#define NUM_INDIVIDUAL_LWLOCKS		36
+
 /*
  * It's a bit odd to declare NUM_BUFFER_PARTITIONS and NUM_LOCK_PARTITIONS
- * here, but we need them to set up enum LWLockId correctly, and having
- * this file include lock.h or bufmgr.h would be backwards.
+ * here, but we need them to figure out offsets within MainLWLockArray, and
+ * having this file include lock.h or bufmgr.h would be backwards.
  */
 
 /* Number of partitions of the shared buffer mapping hashtable */
@@ -31,68 +144,14 @@
 #define LOG2_NUM_PREDICATELOCK_PARTITIONS  4
 #define NUM_PREDICATELOCK_PARTITIONS  (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS)
 
-/*
- * We have a number of predefined LWLocks, plus a bunch of LWLocks that are
- * dynamically assigned (e.g., for shared buffers).  The LWLock structures
- * live in shared memory (since they contain shared data) and are identified
- * by values of this enumerated type.  We abuse the notion of an enum somewhat
- * by allowing values not listed in the enum declaration to be assigned.
- * The extra value MaxDynamicLWLock is there to keep the compiler from
- * deciding that the enum can be represented as char or short ...
- *
- * If you remove a lock, please replace it with a placeholder. This retains
- * the lock numbering, which is helpful for DTrace and other external
- * debugging scripts.
- */
-typedef enum LWLockId
-{
-	BufFreelistLock,
-	ShmemIndexLock,
-	OidGenLock,
-	XidGenLock,
-	ProcArrayLock,
-	SInvalReadLock,
-	SInvalWriteLock,
-	WALBufMappingLock,
-	WALWriteLock,
-	ControlFileLock,
-	CheckpointLock,
-	CLogControlLock,
-	SubtransControlLock,
-	MultiXactGenLock,
-	MultiXactOffsetControlLock,
-	MultiXactMemberControlLock,
-	RelCacheInitLock,
-	CheckpointerCommLock,
-	TwoPhaseStateLock,
-	TablespaceCreateLock,
-	BtreeVacuumLock,
-	AddinShmemInitLock,
-	AutovacuumLock,
-	AutovacuumScheduleLock,
-	SyncScanLock,
-	RelationMappingLock,
-	AsyncCtlLock,
-	AsyncQueueLock,
-	SerializableXactHashLock,
-	SerializableFinishedListLock,
-	SerializablePredicateLockListLock,
-	OldSerXidLock,
-	SyncRepLock,
-	BackgroundWorkerLock,
-	DynamicSharedMemoryControlLock,
-	AutoFileLock,
-	/* Individual lock IDs end here */
-	FirstBufMappingLock,
-	FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
-	FirstPredicateLockMgrLock = FirstLockMgrLock + NUM_LOCK_PARTITIONS,
-
-	/* must be last except for MaxDynamicLWLock: */
-	NumFixedLWLocks = FirstPredicateLockMgrLock + NUM_PREDICATELOCK_PARTITIONS,
-
-	MaxDynamicLWLock = 1000000000
-} LWLockId;
-
+/* Offsets for various chunks of preallocated lwlocks. */
+#define BUFFER_MAPPING_LWLOCK_OFFSET	NUM_INDIVIDUAL_LWLOCKS
+#define LOCK_MANAGER_LWLOCK_OFFSET		\
+	(BUFFER_MAPPING_LWLOCK_OFFSET + NUM_BUFFER_PARTITIONS)
+#define PREDICATELOCK_MANAGER_LWLOCK_OFFSET	\
+	(NUM_INDIVIDUAL_LWLOCKS + NUM_LOCK_PARTITIONS)
+#define NUM_FIXED_LWLOCKS \
+	(PREDICATELOCK_MANAGER_LWLOCK_OFFSET + NUM_PREDICATELOCK_PARTITIONS)
 
 typedef enum LWLockMode
 {
@@ -108,18 +167,47 @@ typedef enum LWLockMode
 extern bool Trace_lwlocks;
 #endif
 
-extern LWLockId LWLockAssign(void);
-extern void LWLockAcquire(LWLockId lockid, LWLockMode mode);
-extern bool LWLockConditionalAcquire(LWLockId lockid, LWLockMode mode);
-extern bool LWLockAcquireOrWait(LWLockId lockid, LWLockMode mode);
-extern void LWLockRelease(LWLockId lockid);
+extern void LWLockAcquire(LWLock *lock, LWLockMode mode);
+extern bool LWLockConditionalAcquire(LWLock *lock, LWLockMode mode);
+extern bool LWLockAcquireOrWait(LWLock *lock, LWLockMode mode);
+extern void LWLockRelease(LWLock *lock);
 extern void LWLockReleaseAll(void);
-extern bool LWLockHeldByMe(LWLockId lockid);
+extern bool LWLockHeldByMe(LWLock *lock);
 
-extern int	NumLWLocks(void);
 extern Size LWLockShmemSize(void);
 extern void CreateLWLocks(void);
 
+/*
+ * The traditional method for obtaining an lwlock for use by an extension is
+ * to call RequestAddinLWLocks() during postmaster startup; this will reserve
+ * space for the indicated number of locks in MainLWLockArray.  Subsequently,
+ * a lock can be allocated using LWLockAssign.
+ */
 extern void RequestAddinLWLocks(int n);
+extern LWLock *LWLockAssign(void);
+
+/*
+ * There is another, more flexible method of obtaining lwlocks. First, call
+ * LWLockNewTrancheId just once to obtain a tranche ID; this allocates from
+ * a shared counter.  Next, each individual process using the tranche should
+ * call LWLockRegisterTranche() to associate that tranche ID with appropriate
+ * metadata.  Finally, LWLockInitialize should be called just once per lwlock,
+ * passing the tranche ID as an argument.
+ *
+ * It may seem strange that each process using the tranche must register it
+ * separately, but dynamic shared memory segments aren't guaranteed to be
+ * mapped at the same address in all coordinating backends, so storing the
+ * registration in the main shared memory segment wouldn't work for that case.
+ */
+extern int LWLockNewTrancheId(void);
+extern void LWLockRegisterTranche(int, LWLockTranche *);
+extern void LWLockInitialize(LWLock *, int tranche_id);
+
+/*
+ * Prior to PostgreSQL 9.4, we used an enum type called LWLockId to refer
+ * to LWLocks.  New code should instead use LWLock *.  However, for the
+ * convenience of third-party code, we include the following typedef.
+ */
+typedef LWLock *LWLockId;
 
 #endif   /* LWLOCK_H */
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index acdc6788bc..a3cadd9a01 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -131,7 +131,7 @@ struct PGPROC
 	struct XidCache subxids;	/* cache for subtransaction XIDs */
 
 	/* Per-backend LWLock.	Protects fields below. */
-	LWLockId	backendLock;	/* protects the fields below */
+	LWLock	   *backendLock;	/* protects the fields below */
 
 	/* Lock manager data, recording fast-path locks taken by this backend. */
 	uint64		fpLockBits;		/* lock modes held for each fast-path slot */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 1f735b70b7..ad40735333 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -896,7 +896,6 @@ LPWSTR
 LSEG
 LVRelStats
 LWLock
-LWLockId
 LWLockMode
 LWLockPadded
 LabelProvider