Fix several problems with lockf/fcntl byte range locks:

- document a data structure invariant in lockf.h - add KASSERT() to check the invariant. - be more consistent about dequeuing ourselves from the blocked list after a tsleep(). - Fix two places where the invariant is violated. - correct a few comments here and there - If we're still following a lock dependancy chain after maxlockdepth processes and haven't gotten back to the start, assume that we're in a cycle anyway and return EDEADLK. Fix is a superset of an existing fix in FreeBSD, but independantly derived. Fixes kern/3860.
2000-06-12 14:33:04 +00:00 · 2000-06-12 14:33:04 +00:00 · c0011ad705
parent bde324b68f
commit c0011ad705
2 changed files with 55 additions and 15 deletions
--- a/sys/kern/vfs_lockf.c
+++ b/sys/kern/vfs_lockf.c
@ -1,4 +1,4 @@
-/*	$NetBSD: vfs_lockf.c,v 1.15 2000/03/30 09:27:14 augustss Exp $	*/
+/*	$NetBSD: vfs_lockf.c,v 1.16 2000/06/12 14:33:06 sommerfeld Exp $	*/

 /*
 * Copyright (c) 1982, 1986, 1989, 1993
@ -62,6 +62,26 @@ int	lockf_debug = 0;
 #define SELF	0x1
 #define OTHERS	0x2

+/*
+ * XXX TODO
+ * Misc cleanups: "caddr_t id" should be visible in the API as a
+ * "struct proc *".
+ * (This requires rototilling all VFS's which support advisory locking).
+ *
+ * Use pools for lock allocation.
+ */
+
+/*
+ * XXXSMP TODO: Using either (a) a global lock, or (b) the vnode's
+ * interlock should be sufficient; (b) requires a change to the API
+ * because the vnode isn't visible here.
+ *
+ * If there's a lot of lock contention on a single vnode, locking
+ * schemes which allow for more paralleism would be needed.  Given how
+ * infrequently byte-range locks are actually used in typical BSD
+ * code, a more complex approach probably isn't worth it.
+ */
+
 /*
 * Do an advisory lock operation.
 */
@ -196,7 +216,7 @@ lf_setlock(lock)
 		 * Deadlock detection is done by looking through the
 		 * wait channels to see if there are any cycles that
 		 * involve us. MAXDEPTH is set just to make sure we
-		 * do not go off into neverland.
+		 * do not go off into neverneverland.
 		 */
 		if ((lock->lf_flags & F_POSIX) &&
 		    (block->lf_flags & F_POSIX)) {
@ -220,6 +240,15 @@ lf_setlock(lock)
 					return (EDEADLK);
 				}
 			}
+			/*
+			 * If we're still following a dependancy chain
+			 * after maxlockdepth iterations, assume we're in
+			 * a cycle to be safe.
+			 */
+			if (i >= maxlockdepth) {
+				free(lock, M_LOCKF);
+				return (EDEADLK);
+			}
 		}
 		/*
 		 * For flock type locks, we must first remove
@ -245,18 +274,20 @@ lf_setlock(lock)
 		}
 #endif /* LOCKF_DEBUG */
 		error = tsleep((caddr_t)lock, priority, lockstr, 0);
+
+		/*
+		 * We may have been awakened by a signal (in
+		 * which case we must remove ourselves from the
+		 * blocked list) and/or by another process
+		 * releasing a lock (in which case we have already
+		 * been removed from the blocked list and our
+		 * lf_next field set to NOLOCKF).
+		 */
+		if (lock->lf_next != NOLOCKF) {
+			TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
+			lock->lf_next = NOLOCKF;
+		}
 		if (error) {
-			/*
-			 * We may have been awakened by a signal (in
-			 * which case we must remove ourselves from the
-			 * blocked list) and/or by another process
-			 * releasing a lock (in which case we have already
-			 * been removed from the blocked list and our
-			 * lf_next field set to NOLOCKF).
-			 */
-			if (lock->lf_next)
-				TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock,
-				    lf_block);
 			free(lock, M_LOCKF);
 			return (error);
 		}
@ -334,8 +365,10 @@ lf_setlock(lock)
 				lf_wakelock(overlap);
 			} else {
 				while ((ltmp = overlap->lf_blkhd.tqh_first)) {
+					KASSERT(ltmp->lf_next == overlap);
 					TAILQ_REMOVE(&overlap->lf_blkhd, ltmp,
 					    lf_block);
+					ltmp->lf_next = lock;
 					TAILQ_INSERT_TAIL(&lock->lf_blkhd,
 					    ltmp, lf_block);
 				}
@ -693,6 +726,7 @@ lf_wakelock(listhead)
 	struct lockf *wakelock;

 	while ((wakelock = listhead->lf_blkhd.tqh_first)) {
+		KASSERT(wakelock->lf_next == listhead);
 		TAILQ_REMOVE(&listhead->lf_blkhd, wakelock, lf_block);
 		wakelock->lf_next = NOLOCKF;
 #ifdef LOCKF_DEBUG
--- a/sys/sys/lockf.h
+++ b/sys/sys/lockf.h
@ -1,4 +1,4 @@
-/*	$NetBSD: lockf.h,v 1.7 1998/03/01 02:24:13 fvdl Exp $	*/
+/*	$NetBSD: lockf.h,v 1.8 2000/06/12 14:33:04 sommerfeld Exp $	*/

 /*
 * Copyright (c) 1991, 1993
@ -46,6 +46,12 @@
 * associated with a byte range lock.  The lockf structures are linked into
 * the inode structure. Locks are sorted by the starting byte of the lock for
 * efficiency.
+ *
+ * lf_next is used for two purposes, depending on whether the lock is
+ * being held, or is in conflict with an existing lock.  If this lock
+ * is held, it indicates the next lock on the same vnode.
+ * For pending locks, if lock->lf_next is non-NULL, then lock->lf_block
+ * must be queued on the lf_blkhd TAILQ of lock->lf_next.
 */

 TAILQ_HEAD(locklist, lockf);
@ -57,7 +63,7 @@ struct lockf {
 	off_t	lf_end;		 /* The byte # of the end of the lock (-1=EOF)*/
 	caddr_t	lf_id;		 /* The id of the resource holding the lock */
 	struct	lockf **lf_head; /* Back pointer to the head of lockf list */
-	struct	lockf *lf_next;	 /* A pointer to the next lock on this inode */
+	struct	lockf *lf_next;	 /* Next lock on this vnode, or blocking lock */
 	struct  locklist lf_blkhd; /* List of requests blocked on this lock */
 	TAILQ_ENTRY(lockf) lf_block;/* A request waiting for a lock */
 };