Disallow descriptor-passing of descriptors which are open on

directories which aren't under the recipient's root.

Clean up of many error conditions involving descriptor passing, to
eliminate infinite loops, panics, premature garbage collection of
sockets, and descriptor leaks:
 - Avoid letting unp_gc() see descriptors with a refcount of zero by
removing them from the socket's queue before releasing them.
 - Avoid socket leak in PRU_ABORT (this will also gc descriptors queued
on a not-yet accepted socket when the accepting socket goes away).
 - Put in block comment explaining how unp_gc() should work.
 - Correctly manage unp_defer count so we don't get stuck in an infinite
loop with nothing to do.
 - Don't tie MARK and DEFER bits so closely together.
 - Mark descriptors queued on not-yet-accepted sockets as well.
 - Don't call sorflush on non-socket, it doesn't work very well.
 - Deal with discard of NULL file pointer.
 - Hopefully cause GC to converge faster by only deferring sockets in
unp_mark().
This commit is contained in:
sommerfe 1999-03-22 17:54:38 +00:00
parent 2f0f84b8e8
commit 098b6f8e8a
2 changed files with 154 additions and 32 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: uipc_usrreq.c,v 1.38 1998/12/21 23:12:19 thorpej Exp $ */ /* $NetBSD: uipc_usrreq.c,v 1.39 1999/03/22 17:54:39 sommerfe Exp $ */
/*- /*-
* Copyright (c) 1998 The NetBSD Foundation, Inc. * Copyright (c) 1998 The NetBSD Foundation, Inc.
@ -339,6 +339,12 @@ uipc_usrreq(so, req, m, nam, control, p)
case PRU_ABORT: case PRU_ABORT:
unp_drop(unp, ECONNABORTED); unp_drop(unp, ECONNABORTED);
#ifdef DIAGNOSTIC
if (so->so_pcb == 0)
panic("uipc 5: drop killed pcb");
#endif
unp_detach(unp);
break; break;
case PRU_SENSE: case PRU_SENSE:
@ -798,21 +804,48 @@ unp_externalize(rights)
struct proc *p = curproc; /* XXX */ struct proc *p = curproc; /* XXX */
register struct cmsghdr *cm = mtod(rights, struct cmsghdr *); register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
register int i, *fdp = (int *)(cm + 1); register int i, *fdp = (int *)(cm + 1);
register struct file **rp = (struct file **)ALIGN(cm + 1); register struct file **rp;
register struct file *fp; register struct file *fp;
int nfds = (cm->cmsg_len - ALIGN(sizeof(*cm))) / sizeof(struct file *); int nfds = (cm->cmsg_len - ALIGN(sizeof(*cm))) / sizeof(struct file *);
int f; int f, error = 0;
/* Make sure the recipient should be able to see the descriptors.. */
if (p->p_fd->fd_rdir != NULL) {
rp = (struct file **)ALIGN(cm + 1);
for (i = 0; i < nfds; i++) {
fp = *rp++;
/*
* If we are in a chroot'ed directory, and
* someone wants to pass us a directory, make
* sure it's inside the subtree we're allowed
* to access.
*/
if (fp->f_type == DTYPE_VNODE) {
struct vnode *vp = (struct vnode *)fp->f_data;
if ((vp->v_type == VDIR) &&
!vn_isunder(vp, p->p_fd->fd_rdir, p)) {
error = EPERM;
break;
}
}
}
}
rp = (struct file **)ALIGN(cm + 1);
/* Make sure that the recipient has space */ /* Make sure that the recipient has space */
if (!fdavail(p, nfds)) { if (error || (!fdavail(p, nfds))) {
for (i = 0; i < nfds; i++) { for (i = 0; i < nfds; i++) {
fp = *rp; fp = *rp;
unp_discard(fp); /*
* zero the pointer before calling unp_discard,
* since it may end up in unp_gc()..
*/
*rp++ = 0; *rp++ = 0;
unp_discard(fp);
} }
return (EMSGSIZE); return (error ? error : EMSGSIZE);
} }
/* /*
* Add file to the recipient's open file table, converting them * Add file to the recipient's open file table, converting them
* to integer file descriptors as we go. Done in forward order * to integer file descriptors as we go. Done in forward order
@ -820,12 +853,13 @@ unp_externalize(rights)
* its corresponding struct file pointer. * its corresponding struct file pointer.
*/ */
for (i = 0; i < nfds; i++) { for (i = 0; i < nfds; i++) {
if (fdalloc(p, 0, &f))
panic("unp_externalize");
fp = *rp++; fp = *rp++;
p->p_fd->fd_ofiles[f] = fp;
fp->f_msgcount--; fp->f_msgcount--;
unp_rights--; unp_rights--;
if (fdalloc(p, 0, &f))
panic("unp_externalize");
p->p_fd->fd_ofiles[f] = fp;
*fdp++ = f; *fdp++ = f;
} }
@ -975,11 +1009,37 @@ unp_addsockcred(p, control)
int unp_defer, unp_gcing; int unp_defer, unp_gcing;
extern struct domain unixdomain; extern struct domain unixdomain;
/*
* Comment added long after the fact explaining what's going on here.
* Do a mark-sweep GC of file descriptors on the system, to free up
* any which are caught in flight to an about-to-be-closed socket.
*
* Traditional mark-sweep gc's start at the "root", and mark
* everything reachable from the root (which, in our case would be the
* process table). The mark bits are cleared during the sweep.
*
* XXX For some inexplicable reason (perhaps because the file
* descriptor tables used to live in the u area which could be swapped
* out and thus hard to reach), we do multiple scans over the set of
* descriptors, using use *two* mark bits per object (DEFER and MARK).
* Whenever we find a descriptor which references other descriptors,
* the ones it references are marked with both bits, and we iterate
* over the whole file table until there are no more DEFER bits set.
* We also make an extra pass *before* the GC to clear the mark bits,
* which could have been cleared at almost no cost during the previous
* sweep.
*
* XXX MP: this needs to run with locks such that no other thread of
* control can create or destroy references to file descriptors. it
* may be necessary to defer the GC until later (when the locking
* situation is more hospitable); it may be necessary to push this
* into a separate thread.
*/
void void
unp_gc() unp_gc()
{ {
register struct file *fp, *nextfp; register struct file *fp, *nextfp;
register struct socket *so; register struct socket *so, *so1;
struct file **extra_ref, **fpp; struct file **extra_ref, **fpp;
int nunref, i; int nunref, i;
@ -987,22 +1047,35 @@ unp_gc()
return; return;
unp_gcing = 1; unp_gcing = 1;
unp_defer = 0; unp_defer = 0;
/* Clear mark bits */
for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
fp->f_flag &= ~(FMARK|FDEFER); fp->f_flag &= ~(FMARK|FDEFER);
/*
* Iterate over the set of descriptors, marking ones believed
* (based on refcount) to be referenced from a process, and
* marking for rescan descriptors which are queued on a socket.
*/
do { do {
for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) { for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
if (fp->f_count == 0)
continue;
if (fp->f_flag & FDEFER) { if (fp->f_flag & FDEFER) {
fp->f_flag &= ~FDEFER; fp->f_flag &= ~FDEFER;
unp_defer--; unp_defer--;
#ifdef DIAGNOSTIC
if (fp->f_count == 0)
panic("unp_gc: deferred unreferenced socket");
#endif
} else { } else {
if (fp->f_count == 0)
continue;
if (fp->f_flag & FMARK) if (fp->f_flag & FMARK)
continue; continue;
if (fp->f_count == fp->f_msgcount) if (fp->f_count == fp->f_msgcount)
continue; continue;
fp->f_flag |= FMARK;
} }
fp->f_flag |= FMARK;
if (fp->f_type != DTYPE_SOCKET || if (fp->f_type != DTYPE_SOCKET ||
(so = (struct socket *)fp->f_data) == 0) (so = (struct socket *)fp->f_data) == 0)
continue; continue;
@ -1025,10 +1098,28 @@ unp_gc()
goto restart; goto restart;
} }
#endif #endif
unp_scan(so->so_rcv.sb_mb, unp_mark); unp_scan(so->so_rcv.sb_mb, unp_mark, 0);
/*
* mark descriptors referenced from sockets queued on the accept queue as well.
*/
if (so->so_options & SO_ACCEPTCONN) {
for (so1 = so->so_q0.tqh_first;
so1 != 0;
so1 = so1->so_qe.tqe_next) {
unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
}
for (so1 = so->so_q.tqh_first;
so1 != 0;
so1 = so1->so_qe.tqe_next) {
unp_scan(so1->so_rcv.sb_mb, unp_mark, 0);
}
}
} }
} while (unp_defer); } while (unp_defer);
/* /*
* Sweep pass. Find unmarked descriptors, and free them.
*
* We grab an extra reference to each of the file table entries * We grab an extra reference to each of the file table entries
* that are not otherwise accessible and then free the rights * that are not otherwise accessible and then free the rights
* that are stored in messages on them. * that are stored in messages on them.
@ -1059,11 +1150,12 @@ unp_gc()
* SS_NOFDREF, and soclose panics at this point. * SS_NOFDREF, and soclose panics at this point.
* *
* Here, we first take an extra reference to each inaccessible * Here, we first take an extra reference to each inaccessible
* descriptor. Then, we call sorflush ourself, since we know * descriptor. Then, if the inaccessible descriptor is a
* it is a Unix domain socket anyhow. After we destroy all the * socket, we call sorflush in case it is a Unix domain
* rights carried in messages, we do a last closef to get rid * socket. After we destroy all the rights carried in
* of our extra reference. This is the last close, and the * messages, we do a last closef to get rid of our extra
* unp_detach etc will shut down the socket. * reference. This is the last close, and the unp_detach etc
* will shut down the socket.
* *
* 91/09/19, bsy@cs.cmu.edu * 91/09/19, bsy@cs.cmu.edu
*/ */
@ -1079,8 +1171,11 @@ unp_gc()
fp->f_count++; fp->f_count++;
} }
} }
for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
sorflush((struct socket *)(*fpp)->f_data); fp = *fpp;
if (fp->f_type == DTYPE_SOCKET)
sorflush((struct socket *)fp->f_data);
}
for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
(void) closef(*fpp, (struct proc *)0); (void) closef(*fpp, (struct proc *)0);
free((caddr_t)extra_ref, M_FILE); free((caddr_t)extra_ref, M_FILE);
@ -1093,13 +1188,14 @@ unp_dispose(m)
{ {
if (m) if (m)
unp_scan(m, unp_discard); unp_scan(m, unp_discard, 1);
} }
void void
unp_scan(m0, op) unp_scan(m0, op, discard)
register struct mbuf *m0; register struct mbuf *m0;
void (*op) __P((struct file *)); void (*op) __P((struct file *));
int discard;
{ {
register struct mbuf *m; register struct mbuf *m;
register struct file **rp; register struct file **rp;
@ -1118,8 +1214,13 @@ unp_scan(m0, op)
qfds = (cm->cmsg_len - ALIGN(sizeof(*cm))) qfds = (cm->cmsg_len - ALIGN(sizeof(*cm)))
/ sizeof(struct file *); / sizeof(struct file *);
rp = (struct file **)(cm + 1); rp = (struct file **)(cm + 1);
for (i = 0; i < qfds; i++) for (i = 0; i < qfds; i++) {
(*op)(*rp++); struct file *fp = *rp;
if (discard)
*rp = 0;
(*op)(fp);
rp++;
}
break; /* XXX, but saves time */ break; /* XXX, but saves time */
} }
m0 = m0->m_act; m0 = m0->m_act;
@ -1130,18 +1231,39 @@ void
unp_mark(fp) unp_mark(fp)
struct file *fp; struct file *fp;
{ {
if (fp == NULL)
return;
if (fp->f_flag & FMARK) if (fp->f_flag & FMARK)
return; return;
unp_defer++;
fp->f_flag |= (FMARK|FDEFER); /* If we're already deferred, don't screw up the defer count */
if (fp->f_flag & FDEFER)
return;
/*
* Minimize the number of deferrals... Sockets are the only
* type of descriptor which can hold references to another
* descriptor, so just mark other descriptors, and defer
* unmarked sockets for the next pass.
*/
if (fp->f_type == DTYPE_SOCKET) {
unp_defer++;
if (fp->f_count == 0)
panic("unp_mark: queued unref");
fp->f_flag |= FDEFER;
} else {
fp->f_flag |= FMARK;
}
return;
} }
void void
unp_discard(fp) unp_discard(fp)
struct file *fp; struct file *fp;
{ {
if (fp == NULL)
return;
fp->f_msgcount--; fp->f_msgcount--;
unp_rights--; unp_rights--;
(void) closef(fp, (struct proc *)0); (void) closef(fp, (struct proc *)0);

View File

@ -1,4 +1,4 @@
/* $NetBSD: un.h,v 1.20 1999/03/10 12:58:00 kleink Exp $ */ /* $NetBSD: un.h,v 1.21 1999/03/22 17:54:38 sommerfe Exp $ */
/* /*
* Copyright (c) 1982, 1986, 1993 * Copyright (c) 1982, 1986, 1993
@ -66,7 +66,7 @@ void unp_disconnect __P((struct unpcb *unp));
void unp_drop __P((struct unpcb *unp, int errno)); void unp_drop __P((struct unpcb *unp, int errno));
void unp_gc __P((void)); void unp_gc __P((void));
void unp_mark __P((struct file *fp)); void unp_mark __P((struct file *fp));
void unp_scan __P((struct mbuf *m0, void (*op)(struct file *))); void unp_scan __P((struct mbuf *m0, void (*op)(struct file *), int));
void unp_shutdown __P((struct unpcb *unp)); void unp_shutdown __P((struct unpcb *unp));
int unp_externalize __P((struct mbuf *)); int unp_externalize __P((struct mbuf *));
int unp_internalize __P((struct mbuf *, struct proc *)); int unp_internalize __P((struct mbuf *, struct proc *));