- Fix a memory order problem with non-interlocked mutex release.

- Give kernel_lock its own cache line.
2008-01-10 20:14:10 +00:00 · 2008-01-10 20:14:10 +00:00 · e8532b7138
commit e8532b7138
parent a38086a9b8
7 changed files with 96 additions and 60 deletions
--- a/sys/arch/amd64/amd64/locore.S
+++ b/sys/arch/amd64/amd64/locore.S
@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.38 2008/01/05 21:47:19 yamt Exp $	*/
+/*	$NetBSD: locore.S,v 1.39 2008/01/10 20:14:10 ad Exp $	*/

 /*
 * Copyright-o-rama!
@ -904,8 +904,12 @@ ENTRY(cpu_switchto)
 	movq	PCB_RSP(%r14),%rsp
 	movq	PCB_RBP(%r14),%rbp

-	/* Set curlwp. */
-	movq	%r12,CPUVAR(CURLWP)
+	/*
+	 * Set curlwp.  This must be globally visible in order to permit
+	 * non-interlocked mutex release.
+	 */
+	movq	%r12,%rcx
+	xchgq	%rcx,CPUVAR(CURLWP)

 	/* Skip the rest if returning to a pinned LWP. */
 	testb	%dl,%dl
--- a/sys/arch/amd64/amd64/spl.S
+++ b/sys/arch/amd64/amd64/spl.S
@ -1,4 +1,4 @@
-/*	$NetBSD: spl.S,v 1.15 2007/12/21 19:18:14 dsl Exp $	*/
+/*	$NetBSD: spl.S,v 1.16 2008/01/10 20:14:11 ad Exp $	*/

 /*
 * Copyright (c) 2003 Wasabi Systems, Inc.
@ -118,7 +118,7 @@ IDTVEC(softintr)
 	call	_C_LABEL(softint_dispatch)/* run handlers */
 	movq	L_ADDR(%r15),%rcx
 	movq	PCB_RSP(%rcx),%rsp
-	movq	%r15,CPUVAR(CURLWP)
+	xchgq	%r15,CPUVAR(CURLWP)	/* must be globally visible */
 	popq	%r15			/* unwind switchframe */
 	addq	$(5 * 8),%rsp
 	cli
--- a/sys/arch/i386/i386/locore.S
+++ b/sys/arch/i386/i386/locore.S
@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.60 2008/01/04 15:55:32 yamt Exp $	*/
+/*	$NetBSD: locore.S,v 1.61 2008/01/10 20:14:11 ad Exp $	*/

 /*
 * Copyright-o-rama!
@ -111,7 +111,7 @@
 */

 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.60 2008/01/04 15:55:32 yamt Exp $");
+__KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.61 2008/01/10 20:14:11 ad Exp $");

 #include "opt_compat_oldboot.h"
 #include "opt_ddb.h"
@ -755,8 +755,12 @@ ENTRY(cpu_switchto)
 	movl	PCB_EBP(%ebx),%ebp
 	movl	PCB_ESP(%ebx),%esp

-	/* Set curlwp. */
-	movl	%edi,CPUVAR(CURLWP)
+	/*
+	 * Set curlwp.  This must be globally visible in order to permit
+	 * non-interlocked mutex release.
+	 */
+	movl	%edi,%ecx
+	xchgl	%ecx,CPUVAR(CURLWP)

 	/* Skip the rest if returning to a pinned LWP. */
 	testl	%edx,%edx
--- a/sys/arch/i386/i386/spl.S
+++ b/sys/arch/i386/i386/spl.S
@ -1,4 +1,4 @@
-/*	$NetBSD: spl.S,v 1.24 2007/12/20 23:46:11 ad Exp $	*/
+/*	$NetBSD: spl.S,v 1.25 2008/01/10 20:14:11 ad Exp $	*/

 /*
 * Copyright (c) 1998, 2007 The NetBSD Foundation, Inc.
@ -37,7 +37,7 @@
 */

 #include <machine/asm.h>
-__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.24 2007/12/20 23:46:11 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: spl.S,v 1.25 2008/01/10 20:14:11 ad Exp $");

 #include "opt_vm86.h"
 #include "opt_ddb.h"
@ -293,7 +293,7 @@ IDTVEC(softintr)
 	addl	$8,%esp
 	movl	L_ADDR(%esi),%ecx
 	movl	PCB_ESP(%ecx),%esp
-	movl	%esi,CPUVAR(CURLWP)
+	xchgl	%esi,CPUVAR(CURLWP)	/* must be globally visible */
 	popl	%edi			/* unwind switchframe */
 	popl	%esi
 	addl	$8,%esp
--- a/sys/arch/xen/i386/locore.S
+++ b/sys/arch/xen/i386/locore.S
@ -1,4 +1,4 @@
-/*	$NetBSD: locore.S,v 1.36 2008/01/04 15:55:34 yamt Exp $	*/
+/*	$NetBSD: locore.S,v 1.37 2008/01/10 20:14:11 ad Exp $	*/
 /*	NetBSD: locore.S,v 1.31 2004/08/26 10:12:33 junyoung Exp	*/

 /*
@ -665,8 +665,12 @@ switch_skipsave:
 	movl	PCB_EBP(%ebx),%ebp
 	movl	PCB_ESP(%ebx),%esp

-	/* Set curlwp. */
-	movl	%edi,CPUVAR(CURLWP)
+	/*
+	 * Set curlwp.  This must be globally visible in order to permit
+	 * non-interlocked mutex release.
+	 */
+	movl	%edi,%ecx
+	xchgl	%ecx,CPUVAR(CURLWP)

 	/*
 	 * Restore the rest of newlwp's context.
--- a/sys/kern/kern_lock.c
+++ b/sys/kern/kern_lock.c
@ -1,4 +1,4 @@
-/*	$NetBSD: kern_lock.c,v 1.131 2008/01/04 21:18:08 ad Exp $	*/
+/*	$NetBSD: kern_lock.c,v 1.132 2008/01/10 20:14:12 ad Exp $	*/

 /*-
 * Copyright (c) 1999, 2000, 2006, 2007 The NetBSD Foundation, Inc.
@ -76,7 +76,7 @@
 */

 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.131 2008/01/04 21:18:08 ad Exp $");
+__KERNEL_RCSID(0, "$NetBSD: kern_lock.c,v 1.132 2008/01/10 20:14:12 ad Exp $");

 #include "opt_multiprocessor.h"

@ -107,7 +107,9 @@ static int acquire(struct lock **, int *, int, int, int, uintptr_t);

 int	lock_debug_syslog = 0;	/* defaults to printf, but can be patched */
 bool	kernel_lock_dodebug;
-__cpu_simple_lock_t kernel_lock;
+
+__cpu_simple_lock_t kernel_lock[CACHE_LINE_SIZE / sizeof(__cpu_simple_lock_t)]
+    __aligned(CACHE_LINE_SIZE);

 #ifdef LOCKDEBUG
 static lockops_t lockmgr_lockops = {
@ -615,7 +617,7 @@ assert_sleepable(struct simplelock *interlock, const char *msg)

 	if (panicstr != NULL)
 		return;
-	LOCKDEBUG_BARRIER(&kernel_lock, 1);
+	LOCKDEBUG_BARRIER(kernel_lock, 1);
 	if (CURCPU_IDLE_P() && !cold) {
 		panic("assert_sleepable: idle");
 	}
@ -636,7 +638,7 @@ assert_sleepable(struct simplelock *interlock, const char *msg)
 */

 #define	_KERNEL_LOCK_ABORT(msg)						\
-    LOCKDEBUG_ABORT(&kernel_lock, &_kernel_lock_ops, __func__, msg)
+    LOCKDEBUG_ABORT(kernel_lock, &_kernel_lock_ops, __func__, msg)

 #ifdef LOCKDEBUG
 #define	_KERNEL_LOCK_ASSERT(cond)					\
@ -663,8 +665,9 @@ void
 kernel_lock_init(void)
 {

-	__cpu_simple_lock_init(&kernel_lock);
-	kernel_lock_dodebug = LOCKDEBUG_ALLOC(&kernel_lock, &_kernel_lock_ops,
+	KASSERT(CACHE_LINE_SIZE >= sizeof(__cpu_simple_lock_t));
+	__cpu_simple_lock_init(kernel_lock);
+	kernel_lock_dodebug = LOCKDEBUG_ALLOC(kernel_lock, &_kernel_lock_ops,
 	    RETURN_ADDRESS);
 }

@ -693,9 +696,7 @@ _kernel_lock(int nlocks, struct lwp *l)
 	LOCKSTAT_TIMER(spintime);
 	LOCKSTAT_FLAG(lsflag);
 	struct lwp *owant;
-#ifdef LOCKDEBUG
 	u_int spins;
-#endif
 	int s;

 	if (nlocks == 0)
@ -705,70 +706,88 @@ _kernel_lock(int nlocks, struct lwp *l)
 	l = curlwp;

 	if (ci->ci_biglock_count != 0) {
-		_KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(&kernel_lock));
+		_KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock));
 		ci->ci_biglock_count += nlocks;
 		l->l_blcnt += nlocks;
 		return;
 	}

 	_KERNEL_LOCK_ASSERT(l->l_blcnt == 0);
-	LOCKDEBUG_WANTLOCK(kernel_lock_dodebug, &kernel_lock, RETURN_ADDRESS,
+	LOCKDEBUG_WANTLOCK(kernel_lock_dodebug, kernel_lock, RETURN_ADDRESS,
 	    0);

 	s = splvm();
-	if (__cpu_simple_lock_try(&kernel_lock)) {
+	if (__cpu_simple_lock_try(kernel_lock)) {
 		ci->ci_biglock_count = nlocks;
 		l->l_blcnt = nlocks;
-		LOCKDEBUG_LOCKED(kernel_lock_dodebug, &kernel_lock,
+		LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock,
 		    RETURN_ADDRESS, 0);
 		splx(s);
 		return;
 	}

+	/*
+	 * To remove the ordering constraint between adaptive mutexes
+	 * and kernel_lock we must make it appear as if this thread is
+	 * blocking.  For non-interlocked mutex release, a store fence
+	 * is required to ensure that the result of any mutex_exit()
+	 * by the current LWP becomes visible on the bus before the set
+	 * of ci->ci_biglock_wanted becomes visible.
+	 */
+	membar_producer();
+	owant = ci->ci_biglock_wanted;
+	ci->ci_biglock_wanted = l;
+
+	/*
+	 * Spin until we acquire the lock.  Once we have it, record the
+	 * time spent with lockstat.
+	 */
 	LOCKSTAT_ENTER(lsflag);
 	LOCKSTAT_START_TIMER(lsflag, spintime);

-	/*
-	 * Before setting ci_biglock_wanted we must post a store
-	 * fence (see kern_mutex.c).  This is accomplished by the
-	 * __cpu_simple_lock_try() above.
-	 */
-	owant = ci->ci_biglock_wanted;
-	ci->ci_biglock_wanted = curlwp;	/* XXXAD */
-
-#ifdef LOCKDEBUG
 	spins = 0;
-#endif
-
 	do {
 		splx(s);
-		while (__SIMPLELOCK_LOCKED_P(&kernel_lock)) {
-#ifdef LOCKDEBUG
-			if (SPINLOCK_SPINOUT(spins))
+		while (__SIMPLELOCK_LOCKED_P(kernel_lock)) {
+			if (SPINLOCK_SPINOUT(spins)) {
 				_KERNEL_LOCK_ABORT("spinout");
-#endif
+			}
 			SPINLOCK_BACKOFF_HOOK;
 			SPINLOCK_SPIN_HOOK;
 		}
-		(void)splvm();
-	} while (!__cpu_simple_lock_try(&kernel_lock));
+		s = splvm();
+	} while (!__cpu_simple_lock_try(kernel_lock));

-	ci->ci_biglock_wanted = owant;
 	ci->ci_biglock_count = nlocks;
 	l->l_blcnt = nlocks;
 	LOCKSTAT_STOP_TIMER(lsflag, spintime);
-	LOCKDEBUG_LOCKED(kernel_lock_dodebug, &kernel_lock, RETURN_ADDRESS, 0);
+	LOCKDEBUG_LOCKED(kernel_lock_dodebug, kernel_lock, RETURN_ADDRESS, 0);
+	if (owant == NULL) {
+		LOCKSTAT_EVENT_RA(lsflag, kernel_lock,
+		    LB_KERNEL_LOCK | LB_SPIN, 1, spintime, RETURN_ADDRESS);
+	}
+	LOCKSTAT_EXIT(lsflag);
 	splx(s);

 	/*
-	 * Again, another store fence is required (see kern_mutex.c).
+	 * Now that we have kernel_lock, reset ci_biglock_wanted.  This
+	 * store must be unbuffered (immediately visible on the bus) in
+	 * order for non-interlocked mutex release to work correctly. 
+	 * It must be visible before a mutex_exit() can execute on this
+	 * processor.
+	 *
+	 * Note: only where CAS is available in hardware will this be
+	 * an unbuffered write, but non-interlocked release cannot be
+	 * done on CPUs without CAS in hardware.
 	 */
-	membar_producer();
-	if (owant == NULL) {
-		LOCKSTAT_EVENT(lsflag, &kernel_lock, LB_KERNEL_LOCK | LB_SPIN,
-		    1, spintime);
-	}
-	LOCKSTAT_EXIT(lsflag);
+	(void)atomic_swap_ptr(&ci->ci_biglock_wanted, owant);
+
+	/*
+	 * Issue a memory barrier as we have acquired a lock.  This also
+	 * prevents stores from a following mutex_exit() being reordered
+	 * to occur before our store to ci_biglock_wanted above.
+	 */
+	membar_enter();
 }

 /*
@ -795,7 +814,7 @@ _kernel_unlock(int nlocks, struct lwp *l, int *countp)
 		return;
 	}

-	_KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(&kernel_lock));
+	_KERNEL_LOCK_ASSERT(__SIMPLELOCK_LOCKED_P(kernel_lock));

 	if (nlocks == 0)
 		nlocks = olocks;
@ -809,10 +828,10 @@ _kernel_unlock(int nlocks, struct lwp *l, int *countp)
 	l->l_blcnt -= nlocks;
 	if (ci->ci_biglock_count == nlocks) {
 		s = splvm();
-		LOCKDEBUG_UNLOCKED(kernel_lock_dodebug, &kernel_lock,
+		LOCKDEBUG_UNLOCKED(kernel_lock_dodebug, kernel_lock,
 		    RETURN_ADDRESS, 0);
 		ci->ci_biglock_count = 0;
-		__cpu_simple_unlock(&kernel_lock);
+		__cpu_simple_unlock(kernel_lock);
 		splx(s);
 	} else
 		ci->ci_biglock_count -= nlocks;
--- a/sys/sys/lock.h
+++ b/sys/sys/lock.h
@ -1,4 +1,4 @@
-/*	$NetBSD: lock.h,v 1.80 2008/01/09 22:06:00 xtraeme Exp $	*/
+/*	$NetBSD: lock.h,v 1.81 2008/01/10 20:14:12 ad Exp $	*/

 /*-
 * Copyright (c) 1999, 2000, 2006, 2007 The NetBSD Foundation, Inc.
@ -243,9 +243,14 @@ do {								\
 } while (/* CONSTCOND */ 0);

 #define	SPINLOCK_RUN_HOOK(count)	((count) >= SPINLOCK_BACKOFF_MAX)
-#define	SPINLOCK_SPINOUT(spins)		((spins)++ > 0x0fffffff)

-extern __cpu_simple_lock_t	kernel_lock;
+#ifdef LOCKDEBUG
+#define	SPINLOCK_SPINOUT(spins)		((spins)++ > 0x0fffffff)
+#else
+#define	SPINLOCK_SPINOUT(spins)		((void)(spins), 0)
+#endif
+
+extern __cpu_simple_lock_t kernel_lock[];

 #endif /* _KERNEL */