From d991fcb3b6084e8abc162cd006cfe3ad32a3bbf3 Mon Sep 17 00:00:00 2001 From: ad Date: Sun, 24 May 2009 21:41:25 +0000 Subject: [PATCH] More changes to improve kern_descrip.c. - Avoid atomics in more places. - Remove the per-descriptor mutex, and just use filedesc_t::fd_lock. It was only being used to synchronize close, and in any case we needed to take fd_lock to free the descriptor slot. - Optimize certain paths for the fd_lastfile + 1)); +#if defined(__NetBSD_Version__) && __NetBSD__Version__ >= 599001200 if (KMCPY(o, fd->fd_ofiles, (fd->fd_lastfile + 1) * sizeof(*o)) == -1) { fprintf(stderr, "read(%#lx,%#lx,%lu) - u_ofile - failed\n", (u_long)fd->fd_ofiles, (u_long)o, (u_long)sizeof(*o)); goto finderror; } +#else + if (KMCPY(o, &fd->fd_dt->dt_ff, (fd->fd_lastfile + 1) * sizeof(*o)) == -1) + { + fprintf(stderr, "read(%#lx,%#lx,%lu) - u_ofile - failed\n", + (u_long)fd->fd_dt->dt_ff, (u_long)o, (u_long)sizeof(*o)); + goto finderror; + } +#endif f = (struct file *)calloc(1, sizeof(*f)); if (KMCPY(f, o[tfd], sizeof(*f)) == -1) { diff --git a/sys/compat/netbsd32/netbsd32_ioctl.c b/sys/compat/netbsd32/netbsd32_ioctl.c index 7e3be7dfecf8..0fe42c14d6fb 100644 --- a/sys/compat/netbsd32/netbsd32_ioctl.c +++ b/sys/compat/netbsd32/netbsd32_ioctl.c @@ -1,4 +1,4 @@ -/* $NetBSD: netbsd32_ioctl.c,v 1.41 2008/07/02 16:45:20 matt Exp $ */ +/* $NetBSD: netbsd32_ioctl.c,v 1.42 2009/05/24 21:41:25 ad Exp $ */ /* * Copyright (c) 1998, 2001 Matthew R. Green @@ -31,7 +31,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: netbsd32_ioctl.c,v 1.41 2008/07/02 16:45:20 matt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: netbsd32_ioctl.c,v 1.42 2009/05/24 21:41:25 ad Exp $"); #include #include @@ -349,7 +349,7 @@ printf("netbsd32_ioctl(%d, %x, %x): %s group %c base %d len %d\n", goto out; } - ff = fdp->fd_ofiles[SCARG(uap, fd)]; + ff = fdp->fd_dt->dt_ff[SCARG(uap, fd)]; switch (com = SCARG(uap, com)) { case FIOCLEX: ff->ff_exclose = true; diff --git a/sys/compat/svr4/svr4_filio.c b/sys/compat/svr4/svr4_filio.c index 4e2c5f3356d6..9ec0dbf2832d 100644 --- a/sys/compat/svr4/svr4_filio.c +++ b/sys/compat/svr4/svr4_filio.c @@ -1,4 +1,4 @@ -/* $NetBSD: svr4_filio.c,v 1.22 2008/07/02 16:45:20 matt Exp $ */ +/* $NetBSD: svr4_filio.c,v 1.23 2009/05/24 21:41:25 ad Exp $ */ /*- * Copyright (c) 1994, 2008 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: svr4_filio.c,v 1.22 2008/07/02 16:45:20 matt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: svr4_filio.c,v 1.23 2009/05/24 21:41:25 ad Exp $"); #include #include @@ -73,7 +73,7 @@ svr4_fil_ioctl(file_t *fp, struct lwp *l, register_t *retval, int fd, u_long cmd if ((fp = fd_getfile(fd)) == NULL) return EBADF; - ff = fdp->fd_ofiles[fd]; + ff = fdp->fd_dt->dt_ff[fd]; switch (cmd) { case SVR4_FIOCLEX: ff->ff_exclose = true; diff --git a/sys/compat/svr4_32/svr4_32_filio.c b/sys/compat/svr4_32/svr4_32_filio.c index 99364838b401..2b351879c343 100644 --- a/sys/compat/svr4_32/svr4_32_filio.c +++ b/sys/compat/svr4_32/svr4_32_filio.c @@ -1,7 +1,7 @@ -/* $NetBSD: svr4_32_filio.c,v 1.16 2008/07/02 16:45:20 matt Exp $ */ +/* $NetBSD: svr4_32_filio.c,v 1.17 2009/05/24 21:41:25 ad Exp $ */ /*- - * Copyright (c) 1994, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 1994, 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: svr4_32_filio.c,v 1.16 2008/07/02 16:45:20 matt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: svr4_32_filio.c,v 1.17 2009/05/24 21:41:25 ad Exp $"); #include #include @@ -72,7 +72,7 @@ svr4_32_fil_ioctl(file_t *fp, struct lwp *l, register_t *retval, int fd, u_long if ((fp = fd_getfile(fd)) == NULL) return EBADF; fdp = curlwp->l_fd; - ff = fdp->fd_ofiles[fd]; + ff = fdp->fd_dt->dt_ff[fd]; error = 0; switch (cmd) { diff --git a/sys/ddb/db_xxx.c b/sys/ddb/db_xxx.c index 5dc983d8e2d9..9ae1427b438b 100644 --- a/sys/ddb/db_xxx.c +++ b/sys/ddb/db_xxx.c @@ -1,4 +1,4 @@ -/* $NetBSD: db_xxx.c,v 1.60 2009/03/21 13:06:39 ad Exp $ */ +/* $NetBSD: db_xxx.c,v 1.61 2009/05/24 21:41:25 ad Exp $ */ /* * Copyright (c) 1982, 1986, 1989, 1991, 1993 @@ -37,7 +37,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: db_xxx.c,v 1.60 2009/03/21 13:06:39 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: db_xxx.c,v 1.61 2009/05/24 21:41:25 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_kgdb.h" @@ -106,6 +106,7 @@ db_show_files_cmd(db_expr_t addr, bool haddr, file_t *fp; struct vnode *vn; bool full = false; + fdtab_t *dt; if (modif[0] == 'f') full = true; @@ -113,8 +114,9 @@ db_show_files_cmd(db_expr_t addr, bool haddr, p = (struct proc *) (uintptr_t) addr; fdp = p->p_fd; - for (i = 0; i < fdp->fd_nfiles; i++) { - if ((ff = fdp->fd_ofiles[i]) == NULL) + dt = fdp->fd_dt; + for (i = 0; i < dt->dt_nfiles; i++) { + if ((ff = dt->dt_ff[i]) == NULL) continue; fp = ff->ff_file; diff --git a/sys/kern/init_sysctl.c b/sys/kern/init_sysctl.c index fc966b0fe174..c503a6e89ecc 100644 --- a/sys/kern/init_sysctl.c +++ b/sys/kern/init_sysctl.c @@ -1,7 +1,7 @@ -/* $NetBSD: init_sysctl.c,v 1.163 2009/05/16 12:02:00 yamt Exp $ */ +/* $NetBSD: init_sysctl.c,v 1.164 2009/05/24 21:41:26 ad Exp $ */ /*- - * Copyright (c) 2003, 2007, 2008 The NetBSD Foundation, Inc. + * Copyright (c) 2003, 2007, 2008, 2009 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: init_sysctl.c,v 1.163 2009/05/16 12:02:00 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: init_sysctl.c,v 1.164 2009/05/24 21:41:26 ad Exp $"); #include "opt_sysv.h" #include "opt_compat_netbsd32.h" @@ -1970,6 +1970,7 @@ sysctl_kern_file2(SYSCTLFN_ARGS) size_t len, needed, elem_size, out_size; int error, arg, elem_count; fdfile_t *ff; + fdtab_t *dt; if (namelen == 1 && name[0] == CTL_QUERY) return (sysctl_query(SYSCTLFN_CALL(rnode))); @@ -2089,20 +2090,18 @@ sysctl_kern_file2(SYSCTLFN_ARGS) /* XXX Do we need to check permission per file? */ fd = p->p_fd; mutex_enter(&fd->fd_lock); - for (i = 0; i < fd->fd_nfiles; i++) { - if ((ff = fd->fd_ofiles[i]) == NULL) { + dt = fd->fd_dt; + for (i = 0; i < dt->dt_nfiles; i++) { + if ((ff = dt->dt_ff[i]) == NULL) { continue; } - mutex_enter(&ff->ff_lock); if ((fp = ff->ff_file) == NULL) { - mutex_exit(&ff->ff_lock); continue; } if (len >= elem_size && elem_count > 0) { mutex_enter(&fp->f_lock); fill_file(&kf, fp, ff, i, p->p_pid); mutex_exit(&fp->f_lock); - mutex_exit(&ff->ff_lock); mutex_exit(&fd->fd_lock); error = dcopyout(l, &kf, dp, out_size); mutex_enter(&fd->fd_lock); @@ -2110,8 +2109,6 @@ sysctl_kern_file2(SYSCTLFN_ARGS) break; dp += elem_size; len -= elem_size; - } else { - mutex_exit(&ff->ff_lock); } needed += elem_size; if (elem_count > 0 && elem_count != INT_MAX) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 8f5536380b5b..27fc33baa23d 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_descrip.c,v 1.191 2009/05/23 18:28:05 ad Exp $ */ +/* $NetBSD: kern_descrip.c,v 1.192 2009/05/24 21:41:26 ad Exp $ */ /*- * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. @@ -70,7 +70,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.191 2009/05/23 18:28:05 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_descrip.c,v 1.192 2009/05/24 21:41:26 ad Exp $"); #include #include @@ -144,6 +144,44 @@ fd_sys_init(void) KASSERT(filedesc_cache != NULL); } +static bool +fd_isused(filedesc_t *fdp, unsigned fd) +{ + u_int off = fd >> NDENTRYSHIFT; + + KASSERT(fd < fdp->fd_dt->dt_nfiles); + + return (fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0; +} + +/* + * Verify that the bitmaps match the descriptor table. + */ +static inline void +fd_checkmaps(filedesc_t *fdp) +{ +#ifdef DEBUG + fdtab_t *dt; + u_int fd; + + dt = fdp->fd_dt; + + for (fd = 0; fd < dt->dt_nfiles; fd++) { + if (fd < NDFDFILE) { + KASSERT(dt->dt_ff[fd] == + (fdfile_t *)fdp->fd_dfdfile[fd]); + } + if (dt->dt_ff[fd] == NULL) { + KASSERT(!fd_isused(fdp, fd)); + } else if (dt->dt_ff[fd]->ff_file != NULL) { + KASSERT(fd_isused(fdp, fd)); + } + } +#else /* DEBUG */ + /* nothing */ +#endif /* DEBUG */ +} + static int fd_next_zero(filedesc_t *fdp, uint32_t *bitmap, int want, u_int bits) { @@ -152,6 +190,8 @@ fd_next_zero(filedesc_t *fdp, uint32_t *bitmap, int want, u_int bits) KASSERT(mutex_owned(&fdp->fd_lock)); + fd_checkmaps(fdp); + if (want > bits) return -1; @@ -181,11 +221,13 @@ static int fd_last_set(filedesc_t *fd, int last) { int off, i; - fdfile_t **ofiles = fd->fd_ofiles; + fdfile_t **ff = fd->fd_dt->dt_ff; uint32_t *bitmap = fd->fd_lomap; KASSERT(mutex_owned(&fd->fd_lock)); + fd_checkmaps(fd); + off = (last - 1) >> NDENTRYSHIFT; while (off >= 0 && !bitmap[off]) @@ -199,20 +241,19 @@ fd_last_set(filedesc_t *fd, int last) i = last - 1; /* XXX should use bitmap */ - /* XXXAD does not work for fd_copy() */ - while (i > 0 && (ofiles[i] == NULL || !ofiles[i]->ff_allocated)) + while (i > 0 && (ff[i] == NULL || !ff[i]->ff_allocated)) i--; return (i); } -void +static inline void fd_used(filedesc_t *fdp, unsigned fd) { u_int off = fd >> NDENTRYSHIFT; fdfile_t *ff; - ff = fdp->fd_ofiles[fd]; + ff = fdp->fd_dt->dt_ff[fd]; KASSERT(mutex_owned(&fdp->fd_lock)); KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) == 0); @@ -222,7 +263,7 @@ fd_used(filedesc_t *fdp, unsigned fd) ff->ff_allocated = 1; fdp->fd_lomap[off] |= 1 << (fd & NDENTRYMASK); - if (fdp->fd_lomap[off] == ~0) { + if (__predict_false(fdp->fd_lomap[off] == ~0)) { KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & (1 << (off & NDENTRYMASK))) == 0); fdp->fd_himap[off >> NDENTRYSHIFT] |= 1 << (off & NDENTRYMASK); @@ -232,20 +273,16 @@ fd_used(filedesc_t *fdp, unsigned fd) fdp->fd_lastfile = fd; } - if (fd >= NDFDFILE) { - fdp->fd_nused++; - } else { - KASSERT(ff == (fdfile_t *)fdp->fd_dfdfile[fd]); - } + fd_checkmaps(fdp); } -void +static inline void fd_unused(filedesc_t *fdp, unsigned fd) { u_int off = fd >> NDENTRYSHIFT; fdfile_t *ff; - ff = fdp->fd_ofiles[fd]; + ff = fdp->fd_dt->dt_ff[fd]; /* * Don't assert the lock is held here, as we may be copying @@ -276,46 +313,7 @@ fd_unused(filedesc_t *fdp, unsigned fd) if (fd == fdp->fd_lastfile) { fdp->fd_lastfile = fd_last_set(fdp, fd); } - - if (fd >= NDFDFILE) { - KASSERT(fdp->fd_nused > 0); - fdp->fd_nused--; - } else { - KASSERT(ff == (fdfile_t *)fdp->fd_dfdfile[fd]); - } -} - -/* - * Custom version of fd_unused() for fd_copy(), where the descriptor - * table is not yet fully initialized. - */ -static inline void -fd_zap(filedesc_t *fdp, unsigned fd) -{ - u_int off = fd >> NDENTRYSHIFT; - - if (fd < fdp->fd_freefile) { - fdp->fd_freefile = fd; - } - - if (fdp->fd_lomap[off] == ~0) { - KASSERT((fdp->fd_himap[off >> NDENTRYSHIFT] & - (1 << (off & NDENTRYMASK))) != 0); - fdp->fd_himap[off >> NDENTRYSHIFT] &= - ~(1 << (off & NDENTRYMASK)); - } - KASSERT((fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0); - fdp->fd_lomap[off] &= ~(1 << (fd & NDENTRYMASK)); -} - -bool -fd_isused(filedesc_t *fdp, unsigned fd) -{ - u_int off = fd >> NDENTRYSHIFT; - - KASSERT(fd < fdp->fd_nfiles); - - return (fdp->fd_lomap[off] & (1 << (fd & NDENTRYMASK))) != 0; + fd_checkmaps(fdp); } /* @@ -328,19 +326,18 @@ fd_getfile(unsigned fd) filedesc_t *fdp; fdfile_t *ff; file_t *fp; - - fdp = curlwp->l_fd; + fdtab_t *dt; /* * Look up the fdfile structure representing this descriptor. - * Ensure that we see fd_nfiles before fd_ofiles since we - * are doing this unlocked. See fd_tryexpand(). + * We are doing this unlocked. See fd_tryexpand(). */ - if (__predict_false(fd >= fdp->fd_nfiles)) { + fdp = curlwp->l_fd; + dt = fdp->fd_dt; + if (__predict_false(fd >= dt->dt_nfiles)) { return NULL; } - membar_consumer(); - ff = fdp->fd_ofiles[fd]; + ff = dt->dt_ff[fd]; KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); if (__predict_false(ff == NULL)) { return NULL; @@ -356,9 +353,9 @@ fd_getfile(unsigned fd) ff->ff_refcnt++; } else { /* - * Issue a memory barrier to ensure that we acquire the file - * pointer _after_ adding a reference. If no memory - * barrier, we could fetch a stale pointer. + * Multi threaded: issue a memory barrier to ensure that we + * acquire the file pointer _after_ adding a reference. If + * no memory barrier, we could fetch a stale pointer. */ atomic_inc_uint(&ff->ff_refcnt); #ifndef __HAVE_ATOMIC_AS_MEMBAR @@ -389,9 +386,9 @@ fd_putfile(unsigned fd) u_int u, v; fdp = curlwp->l_fd; - ff = fdp->fd_ofiles[fd]; + ff = fdp->fd_dt->dt_ff[fd]; - KASSERT(fd < fdp->fd_nfiles); + KASSERT(fd < fdp->fd_dt->dt_nfiles); KASSERT(ff != NULL); KASSERT((ff->ff_refcnt & FR_MASK) > 0); KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); @@ -506,27 +503,26 @@ fd_getfile2(proc_t *p, unsigned fd) filedesc_t *fdp; fdfile_t *ff; file_t *fp; + fdtab_t *dt; fdp = p->p_fd; mutex_enter(&fdp->fd_lock); - if (fd > fdp->fd_nfiles) { + dt = fdp->fd_dt; + if (fd >= dt->dt_nfiles) { mutex_exit(&fdp->fd_lock); return NULL; } - if ((ff = fdp->fd_ofiles[fd]) == NULL) { + if ((ff = dt->dt_ff[fd]) == NULL) { mutex_exit(&fdp->fd_lock); return NULL; } - mutex_enter(&ff->ff_lock); if ((fp = ff->ff_file) == NULL) { - mutex_exit(&ff->ff_lock); mutex_exit(&fdp->fd_lock); return NULL; } mutex_enter(&fp->f_lock); fp->f_count++; mutex_exit(&fp->f_lock); - mutex_exit(&ff->ff_lock); mutex_exit(&fdp->fd_lock); return fp; @@ -547,17 +543,18 @@ fd_close(unsigned fd) file_t *fp; proc_t *p; lwp_t *l; + u_int refcnt; l = curlwp; p = l->l_proc; fdp = l->l_fd; - ff = fdp->fd_ofiles[fd]; + ff = fdp->fd_dt->dt_ff[fd]; KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); - mutex_enter(&ff->ff_lock); + mutex_enter(&fdp->fd_lock); KASSERT((ff->ff_refcnt & FR_MASK) > 0); - if (ff->ff_file == NULL) { + if (__predict_false(ff->ff_file == NULL)) { /* * Another user of the file is already closing, and is * waiting for other users of the file to drain. Release @@ -565,7 +562,7 @@ fd_close(unsigned fd) */ atomic_dec_uint(&ff->ff_refcnt); cv_broadcast(&ff->ff_closing); - mutex_exit(&ff->ff_lock); + mutex_exit(&fdp->fd_lock); /* * An application error, so pretend that the descriptor @@ -593,10 +590,17 @@ fd_close(unsigned fd) * fd_getfile() will notice that the file is being closed and drop * the reference again. */ + if (fdp->fd_refcnt == 1) { + /* Single threaded. */ + refcnt = --(ff->ff_refcnt); + } else { + /* Multi threaded. */ #ifndef __HAVE_ATOMIC_AS_MEMBAR - membar_producer(); + membar_producer(); #endif - if (__predict_false(atomic_dec_uint_nv(&ff->ff_refcnt) != 0)) { + refcnt = atomic_dec_uint_nv(&ff->ff_refcnt); + } + if (__predict_false(refcnt != 0)) { /* * Wait for other references to drain. This is typically * an application error - the descriptor is being closed @@ -609,14 +613,14 @@ fd_close(unsigned fd) * Remove any knotes attached to the file. A knote * attached to the descriptor can hold references on it. */ - mutex_exit(&ff->ff_lock); + mutex_exit(&fdp->fd_lock); if (!SLIST_EMPTY(&ff->ff_knlist)) { knote_fdclose(fd); } /* Try to drain out descriptor references. */ (*fp->f_ops->fo_drain)(fp); - mutex_enter(&ff->ff_lock); + mutex_enter(&fdp->fd_lock); /* * We need to see the count drop to zero at least once, @@ -625,14 +629,13 @@ fd_close(unsigned fd) * of no interest. */ while ((ff->ff_refcnt & FR_MASK) != 0) { - cv_wait(&ff->ff_closing, &ff->ff_lock); + cv_wait(&ff->ff_closing, &fdp->fd_lock); } atomic_and_uint(&ff->ff_refcnt, ~FR_CLOSING); } else { /* If no references, there must be no knotes. */ KASSERT(SLIST_EMPTY(&ff->ff_knlist)); } - mutex_exit(&ff->ff_lock); /* * POSIX record locking dictates that any close releases ALL @@ -642,17 +645,18 @@ fd_close(unsigned fd) * If the descriptor was in a message, POSIX-style locks * aren't passed with the descriptor. */ - if ((p->p_flag & PK_ADVLOCK) != 0 && fp->f_type == DTYPE_VNODE) { + if (__predict_false((p->p_flag & PK_ADVLOCK) != 0 && + fp->f_type == DTYPE_VNODE)) { lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; lf.l_type = F_UNLCK; + mutex_exit(&fdp->fd_lock); (void)VOP_ADVLOCK(fp->f_data, p, F_UNLCK, &lf, F_POSIX); + mutex_enter(&fdp->fd_lock); } - /* Free descriptor slot. */ - mutex_enter(&fdp->fd_lock); fd_unused(fdp, fd); mutex_exit(&fdp->fd_lock); @@ -678,7 +682,7 @@ fd_dup(file_t *fp, int minfd, int *newp, bool exclose) fd_tryexpand(p); } - curlwp->l_fd->fd_ofiles[*newp]->ff_exclose = exclose; + curlwp->l_fd->fd_dt->dt_ff[*newp]->ff_exclose = exclose; fd_affix(p, fp, *newp); return 0; } @@ -691,6 +695,7 @@ fd_dup2(file_t *fp, unsigned new) { filedesc_t *fdp; fdfile_t *ff; + fdtab_t *dt; fdp = curlwp->l_fd; @@ -698,7 +703,7 @@ fd_dup2(file_t *fp, unsigned new) * Ensure there are enough slots in the descriptor table, * and allocate an fdfile_t up front in case we need it. */ - while (new >= fdp->fd_nfiles) { + while (new >= fdp->fd_dt->dt_nfiles) { fd_tryexpand(curproc); } ff = pool_cache_get(fdfile_cache, PR_WAITOK); @@ -714,14 +719,19 @@ fd_dup2(file_t *fp, unsigned new) if (fd_getfile(new) != NULL) { (void)fd_close(new); } else { - /* XXX Crummy, but unlikely to happen. */ + /* + * Crummy, but unlikely to happen. + * Can occur if we interrupt another + * thread while it is opening a file. + */ kpause("dup2", false, 1, NULL); } mutex_enter(&fdp->fd_lock); } - if (fdp->fd_ofiles[new] == NULL) { + dt = fdp->fd_dt; + if (dt->dt_ff[new] == NULL) { KASSERT(new >= NDFDFILE); - fdp->fd_ofiles[new] = ff; + dt->dt_ff[new] = ff; ff = NULL; } fd_used(fdp, new); @@ -786,6 +796,7 @@ fd_alloc(proc_t *p, int want, int *result) filedesc_t *fdp; int i, lim, last, error; u_int off, new; + fdtab_t *dt; KASSERT(p == curproc || p == &proc0); @@ -796,9 +807,11 @@ fd_alloc(proc_t *p, int want, int *result) * of want or fd_freefile. */ mutex_enter(&fdp->fd_lock); - KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); + fd_checkmaps(fdp); + dt = fdp->fd_dt; + KASSERT(dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); - last = min(fdp->fd_nfiles, lim); + last = min(dt->dt_nfiles, lim); for (;;) { if ((i = want) < fdp->fd_freefile) i = fdp->fd_freefile; @@ -821,61 +834,71 @@ fd_alloc(proc_t *p, int want, int *result) if (i >= last) { break; } - if (fdp->fd_ofiles[i] == NULL) { + if (dt->dt_ff[i] == NULL) { KASSERT(i >= NDFDFILE); - fdp->fd_ofiles[i] = - pool_cache_get(fdfile_cache, PR_WAITOK); + dt->dt_ff[i] = pool_cache_get(fdfile_cache, PR_WAITOK); } - KASSERT(fdp->fd_ofiles[i]->ff_refcnt == 0); - KASSERT(fdp->fd_ofiles[i]->ff_file == NULL); + KASSERT(dt->dt_ff[i]->ff_refcnt == 0); + KASSERT(dt->dt_ff[i]->ff_file == NULL); fd_used(fdp, i); if (want <= fdp->fd_freefile) { fdp->fd_freefile = i; } *result = i; - mutex_exit(&fdp->fd_lock); KASSERT(i >= NDFDFILE || - fdp->fd_ofiles[i] == (fdfile_t *)fdp->fd_dfdfile[i]); + dt->dt_ff[i] == (fdfile_t *)fdp->fd_dfdfile[i]); + fd_checkmaps(fdp); + mutex_exit(&fdp->fd_lock); return 0; } /* No space in current array. Let the caller expand and retry. */ - error = (fdp->fd_nfiles >= lim) ? EMFILE : ENOSPC; + error = (dt->dt_nfiles >= lim) ? EMFILE : ENOSPC; mutex_exit(&fdp->fd_lock); return error; } /* - * Allocate memory for the open files array. + * Allocate memory for a descriptor table. */ -static fdfile_t ** -fd_ofile_alloc(int n) +static fdtab_t * +fd_dtab_alloc(int n) { - uintptr_t *ptr, sz; + fdtab_t *dt; + size_t sz; KASSERT(n > NDFILE); - sz = (n + 2) * sizeof(uintptr_t); - ptr = kmem_alloc((size_t)sz, KM_SLEEP); - ptr[1] = sz; - - return (fdfile_t **)(ptr + 2); + sz = sizeof(*dt) + (n - NDFILE) * sizeof(dt->dt_ff[0]); + dt = kmem_alloc(sz, KM_SLEEP); +#ifdef DIAGNOSTIC + memset(dt, 0xff, sz); +#endif + dt->dt_nfiles = n; + dt->dt_link = NULL; + return dt; } /* - * Free an open files array. + * Free a descriptor table, and all tables linked for deferred free. */ static void -fd_ofile_free(int n, fdfile_t **of) +fd_dtab_free(fdtab_t *dt) { - uintptr_t *ptr, sz; + fdtab_t *next; + size_t sz; - KASSERT(n > NDFILE); - - sz = (n + 2) * sizeof(uintptr_t); - ptr = (uintptr_t *)of - 2; - KASSERT(ptr[1] == sz); - kmem_free(ptr, sz); + do { + next = dt->dt_link; + KASSERT(dt->dt_nfiles > NDFILE); + sz = sizeof(*dt) + + (dt->dt_nfiles - NDFILE) * sizeof(dt->dt_ff[0]); +#ifdef DIAGNOSTIC + memset(dt, 0xff, sz); +#endif + kmem_free(dt, sz); + dt = next; + } while (dt != NULL); } /* @@ -920,7 +943,7 @@ fd_tryexpand(proc_t *p) { filedesc_t *fdp; int i, numfiles, oldnfiles; - fdfile_t **newofile; + fdtab_t *newdt, *dt; uint32_t *newhimap, *newlomap; KASSERT(p == curproc || p == &proc0); @@ -928,46 +951,47 @@ fd_tryexpand(proc_t *p) fdp = p->p_fd; newhimap = NULL; newlomap = NULL; - oldnfiles = fdp->fd_nfiles; + oldnfiles = fdp->fd_dt->dt_nfiles; if (oldnfiles < NDEXTENT) numfiles = NDEXTENT; else numfiles = 2 * oldnfiles; - newofile = fd_ofile_alloc(numfiles); + newdt = fd_dtab_alloc(numfiles); if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { fd_map_alloc(numfiles, &newlomap, &newhimap); } mutex_enter(&fdp->fd_lock); - KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); - if (fdp->fd_nfiles != oldnfiles) { + dt = fdp->fd_dt; + KASSERT(dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); + if (dt->dt_nfiles != oldnfiles) { /* fdp changed; caller must retry */ mutex_exit(&fdp->fd_lock); - fd_ofile_free(numfiles, newofile); + fd_dtab_free(newdt); if (NDHISLOTS(numfiles) > NDHISLOTS(oldnfiles)) { fd_map_free(numfiles, newlomap, newhimap); } return; } - /* Copy the existing ofile array and zero the new portion. */ - i = sizeof(fdfile_t *) * fdp->fd_nfiles; - memcpy(newofile, fdp->fd_ofiles, i); - memset((uint8_t *)newofile + i, 0, numfiles * sizeof(fdfile_t *) - i); + /* Copy the existing descriptor table and zero the new portion. */ + i = sizeof(fdfile_t *) * oldnfiles; + memcpy(newdt->dt_ff, dt->dt_ff, i); + memset((uint8_t *)newdt->dt_ff + i, 0, numfiles * sizeof(fdfile_t *) - i); /* - * Link old ofiles array into list to be discarded. We defer - * freeing until process exit if the descriptor table is visble - * to other threads. + * Link old descriptor array into list to be discarded. We defer + * freeing until the last reference to the descriptor table goes + * away (usually process exit). This allows us to do lockless + * lookups in fd_getfile(). */ if (oldnfiles > NDFILE) { if (fdp->fd_refcnt > 1) { - fdp->fd_ofiles[-2] = (void *)fdp->fd_discard; - fdp->fd_discard = fdp->fd_ofiles - 2; + newdt->dt_link = dt; } else { - fd_ofile_free(oldnfiles, fdp->fd_ofiles); + fd_dtab_free(dt); } } @@ -991,14 +1015,13 @@ fd_tryexpand(proc_t *p) /* * All other modifications must become globally visible before - * the change to fd_nfiles. See fd_getfile(). + * the change to fd_dt. See fd_getfile(). */ - fdp->fd_ofiles = newofile; membar_producer(); - fdp->fd_nfiles = numfiles; + fdp->fd_dt = newdt; + KASSERT(newdt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); + fd_checkmaps(fdp); mutex_exit(&fdp->fd_lock); - - KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); } /* @@ -1080,14 +1103,13 @@ fd_affix(proc_t *p, file_t *fp, unsigned fd) * current process. */ fdp = p->p_fd; - ff = fdp->fd_ofiles[fd]; + ff = fdp->fd_dt->dt_ff[fd]; KASSERT(ff != NULL); KASSERT(ff->ff_file == NULL); KASSERT(ff->ff_allocated); KASSERT(fd_isused(fdp, fd)); - KASSERT(fd >= NDFDFILE || - fdp->fd_ofiles[fd] == (fdfile_t *)fdp->fd_dfdfile[fd]); + KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); /* No need to lock in order to make file initially visible. */ ff->ff_file = fp; @@ -1105,10 +1127,9 @@ fd_abort(proc_t *p, file_t *fp, unsigned fd) KASSERT(p == curproc || p == &proc0); fdp = p->p_fd; - ff = fdp->fd_ofiles[fd]; + ff = fdp->fd_dt->dt_ff[fd]; - KASSERT(fd >= NDFDFILE || - fdp->fd_ofiles[fd] == (fdfile_t *)fdp->fd_dfdfile[fd]); + KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); mutex_enter(&fdp->fd_lock); KASSERT(fd_isused(fdp, fd)); @@ -1165,7 +1186,6 @@ fdfile_ctor(void *arg, void *obj, int flags) fdfile_t *ff = obj; memset(ff, 0, sizeof(*ff)); - mutex_init(&ff->ff_lock, MUTEX_DEFAULT, IPL_NONE); cv_init(&ff->ff_closing, "fdclose"); return 0; @@ -1176,7 +1196,6 @@ fdfile_dtor(void *arg, void *obj) { fdfile_t *ff = obj; - mutex_destroy(&ff->ff_lock); cv_destroy(&ff->ff_closing); } @@ -1207,28 +1226,37 @@ fputdummy(file_t *fp) filedesc_t * fd_init(filedesc_t *fdp) { +#ifdef DIAGNOSTIC unsigned fd; +#endif - if (fdp == NULL) { + if (__predict_true(fdp == NULL)) { fdp = pool_cache_get(filedesc_cache, PR_WAITOK); } else { + /* XXXRUMP KASSERT(fdp == &filedesc0); */ filedesc_ctor(NULL, fdp, PR_WAITOK); } - fdp->fd_refcnt = 1; - fdp->fd_ofiles = fdp->fd_dfiles; - fdp->fd_nfiles = NDFILE; - fdp->fd_himap = fdp->fd_dhimap; - fdp->fd_lomap = fdp->fd_dlomap; +#ifdef DIAGNOSTIC KASSERT(fdp->fd_lastfile == -1); KASSERT(fdp->fd_lastkqfile == -1); KASSERT(fdp->fd_knhash == NULL); - - memset(&fdp->fd_startzero, 0, sizeof(*fdp) - - offsetof(filedesc_t, fd_startzero)); + KASSERT(fdp->fd_freefile == 0); + KASSERT(fdp->fd_exclose == false); + KASSERT(fdp->fd_dt == &fdp->fd_dtbuiltin); + KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); for (fd = 0; fd < NDFDFILE; fd++) { - fdp->fd_ofiles[fd] = (fdfile_t *)fdp->fd_dfdfile[fd]; + KASSERT(fdp->fd_dtbuiltin.dt_ff[fd] == + (fdfile_t *)fdp->fd_dfdfile[fd]); } + for (fd = NDFDFILE; fd < NDFILE; fd++) { + KASSERT(fdp->fd_dtbuiltin.dt_ff[fd] == NULL); + } +#endif /* DIAGNOSTIC */ + + fdp->fd_refcnt = 1; + fdp->fd_himap = fdp->fd_dhimap; + fdp->fd_lomap = fdp->fd_dlomap; return fdp; } @@ -1240,16 +1268,20 @@ static int filedesc_ctor(void *arg, void *obj, int flag) { filedesc_t *fdp = obj; + fdfile_t **ffp; int i; memset(fdp, 0, sizeof(*fdp)); mutex_init(&fdp->fd_lock, MUTEX_DEFAULT, IPL_NONE); fdp->fd_lastfile = -1; fdp->fd_lastkqfile = -1; + fdp->fd_dt = &fdp->fd_dtbuiltin; + fdp->fd_dtbuiltin.dt_nfiles = NDFILE; CTASSERT(sizeof(fdp->fd_dfdfile[0]) >= sizeof(fdfile_t)); - for (i = 0; i < NDFDFILE; i++) { - fdfile_ctor(NULL, fdp->fd_dfdfile[i], PR_WAITOK); + for (i = 0, ffp = fdp->fd_dt->dt_ff; i < NDFDFILE; i++, ffp++) { + *ffp = (fdfile_t *)fdp->fd_dfdfile[i]; + (void)fdfile_ctor(NULL, fdp->fd_dfdfile[i], PR_WAITOK); } return 0; @@ -1298,168 +1330,127 @@ filedesc_t * fd_copy(void) { filedesc_t *newfdp, *fdp; - fdfile_t *ff, *fflist, **ffp, **nffp, *ff2; - int i, nused, numfiles, lastfile, j, newlast; + fdfile_t *ff, **ffp, **nffp, *ff2; + int i, j, numfiles, lastfile, newlast; file_t *fp; + fdtab_t *newdt; fdp = curproc->p_fd; newfdp = pool_cache_get(filedesc_cache, PR_WAITOK); newfdp->fd_refcnt = 1; +#ifdef DIAGNOSTIC + KASSERT(newfdp->fd_lastfile == -1); + KASSERT(newfdp->fd_lastkqfile == -1); KASSERT(newfdp->fd_knhash == NULL); - KASSERT(newfdp->fd_knhashmask == 0); - KASSERT(newfdp->fd_discard == NULL); - - for (;;) { - numfiles = fdp->fd_nfiles; - lastfile = fdp->fd_lastfile; - - /* - * If the number of open files fits in the internal arrays - * of the open file structure, use them, otherwise allocate - * additional memory for the number of descriptors currently - * in use. - */ - if (lastfile < NDFILE) { - i = NDFILE; - newfdp->fd_ofiles = newfdp->fd_dfiles; - } else { - /* - * Compute the smallest multiple of NDEXTENT needed - * for the file descriptors currently in use, - * allowing the table to shrink. - */ - i = numfiles; - while (i >= 2 * NDEXTENT && i > lastfile * 2) { - i /= 2; - } - newfdp->fd_ofiles = fd_ofile_alloc(i); - KASSERT(i > NDFILE); - } - if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { - newfdp->fd_himap = newfdp->fd_dhimap; - newfdp->fd_lomap = newfdp->fd_dlomap; - } else { - fd_map_alloc(i, &newfdp->fd_lomap, - &newfdp->fd_himap); - } - - /* - * Allocate and string together fdfile structures. - * We abuse fdfile_t::ff_file here, but it will be - * cleared before this routine returns. - */ - nused = fdp->fd_nused; - fflist = NULL; - for (j = nused; j != 0; j--) { - ff = pool_cache_get(fdfile_cache, PR_WAITOK); - ff->ff_file = (void *)fflist; - fflist = ff; - } - - mutex_enter(&fdp->fd_lock); - if (numfiles == fdp->fd_nfiles && nused == fdp->fd_nused && - lastfile == fdp->fd_lastfile) { - break; - } - mutex_exit(&fdp->fd_lock); - if (i > NDFILE) { - fd_ofile_free(i, newfdp->fd_ofiles); - } - if (NDHISLOTS(i) > NDHISLOTS(NDFILE)) { - fd_map_free(i, newfdp->fd_lomap, newfdp->fd_himap); - } - while (fflist != NULL) { - ff = fflist; - fflist = (void *)ff->ff_file; - ff->ff_file = NULL; - pool_cache_put(fdfile_cache, ff); - } + KASSERT(newfdp->fd_freefile == 0); + KASSERT(newfdp->fd_exclose == false); + KASSERT(newfdp->fd_dt == &newfdp->fd_dtbuiltin); + KASSERT(newfdp->fd_dtbuiltin.dt_nfiles == NDFILE); + for (i = 0; i < NDFDFILE; i++) { + KASSERT(newfdp->fd_dtbuiltin.dt_ff[i] == + (fdfile_t *)&newfdp->fd_dfdfile[i]); } + for (i = NDFDFILE; i < NDFILE; i++) { + KASSERT(newfdp->fd_dtbuiltin.dt_ff[i] == NULL); + } +#endif /* DIAGNOSTIC */ - newfdp->fd_nfiles = i; + mutex_enter(&fdp->fd_lock); + fd_checkmaps(fdp); + numfiles = fdp->fd_dt->dt_nfiles; + lastfile = fdp->fd_lastfile; + + /* + * If the number of open files fits in the internal arrays + * of the open file structure, use them, otherwise allocate + * additional memory for the number of descriptors currently + * in use. + */ + if (lastfile < NDFILE) { + i = NDFILE; + newdt = newfdp->fd_dt; + KASSERT(newfdp->fd_dt == &newfdp->fd_dtbuiltin); + } else { + /* + * Compute the smallest multiple of NDEXTENT needed + * for the file descriptors currently in use, + * allowing the table to shrink. + */ + i = numfiles; + while (i >= 2 * NDEXTENT && i > lastfile * 2) { + i /= 2; + } + KASSERT(i > NDFILE); + newdt = fd_dtab_alloc(i); + newfdp->fd_dt = newdt; + memcpy(newdt->dt_ff, newfdp->fd_dtbuiltin.dt_ff, + NDFDFILE * sizeof(fdfile_t **)); + memset(newdt->dt_ff + NDFDFILE, 0, + (i - NDFDFILE) * sizeof(fdfile_t **)); + } + if (NDHISLOTS(i) <= NDHISLOTS(NDFILE)) { + newfdp->fd_himap = newfdp->fd_dhimap; + newfdp->fd_lomap = newfdp->fd_dlomap; + } else { + fd_map_alloc(i, &newfdp->fd_lomap, &newfdp->fd_himap); + KASSERT(i >= NDENTRIES * NDENTRIES); + memset(newfdp->fd_himap, 0, NDHISLOTS(i)*sizeof(uint32_t)); + memset(newfdp->fd_lomap, 0, NDLOSLOTS(i)*sizeof(uint32_t)); + } newfdp->fd_freefile = fdp->fd_freefile; newfdp->fd_exclose = fdp->fd_exclose; - /* - * Clear the entries that will not be copied over. - * Avoid calling memset with 0 size. - */ - if (lastfile < (i-1)) { - memset(newfdp->fd_ofiles + lastfile + 1, 0, - (i - lastfile - 1) * sizeof(file_t **)); - } - if (i < NDENTRIES * NDENTRIES) { - i = NDENTRIES * NDENTRIES; /* size of inlined bitmaps */ - } - memcpy(newfdp->fd_himap, fdp->fd_himap, NDHISLOTS(i)*sizeof(uint32_t)); - memcpy(newfdp->fd_lomap, fdp->fd_lomap, NDLOSLOTS(i)*sizeof(uint32_t)); - - ffp = fdp->fd_ofiles; - nffp = newfdp->fd_ofiles; - j = imax(lastfile, (NDFDFILE - 1)); + ffp = fdp->fd_dt->dt_ff; + nffp = newdt->dt_ff; newlast = -1; - KASSERT(j < fdp->fd_nfiles); - for (i = 0; i <= j; i++, ffp++, *nffp++ = ff2) { + for (i = 0; i <= (int)lastfile; i++, ffp++, nffp++) { + KASSERT(i >= NDFDFILE || + *nffp == (fdfile_t *)newfdp->fd_dfdfile[i]); ff = *ffp; - /* Install built-in fdfiles even if unused here. */ - if (i < NDFDFILE) { - ff2 = (fdfile_t *)newfdp->fd_dfdfile[i]; - } else { - ff2 = NULL; - } - /* Determine if descriptor is active in parent. */ - if (ff == NULL || !fd_isused(fdp, i)) { - KASSERT(ff != NULL || i >= NDFDFILE); + if (ff == NULL || (fp = ff->ff_file) == NULL) { + /* Descriptor unused, or descriptor half open. */ + KASSERT(!fd_isused(fdp, i)); + KASSERT(!fd_isused(newfdp, i)); continue; } - mutex_enter(&ff->ff_lock); - fp = ff->ff_file; - if (fp == NULL) { - /* Descriptor is half-open: free slot. */ - fd_zap(newfdp, i); - mutex_exit(&ff->ff_lock); - continue; - } - if (fp->f_type == DTYPE_KQUEUE) { + if (__predict_false(fp->f_type == DTYPE_KQUEUE)) { /* kqueue descriptors cannot be copied. */ - fd_zap(newfdp, i); - mutex_exit(&ff->ff_lock); continue; } /* It's active: add a reference to the file. */ mutex_enter(&fp->f_lock); fp->f_count++; mutex_exit(&fp->f_lock); - /* Consume one fdfile_t to represent it. */ + + /* Allocate an fdfile_t to represent it. */ if (i >= NDFDFILE) { - ff2 = fflist; - fflist = (void *)ff2->ff_file; + ff2 = pool_cache_get(fdfile_cache, PR_WAITOK); + *nffp = ff2; + } else { + ff2 = newdt->dt_ff[i]; } ff2->ff_file = fp; ff2->ff_exclose = ff->ff_exclose; ff2->ff_allocated = true; - mutex_exit(&ff->ff_lock); - if (i > newlast) { - newlast = i; + + /* Fix up bitmaps. */ + j = i >> NDENTRYSHIFT; + KASSERT((newfdp->fd_lomap[j] & (1 << (i & NDENTRYMASK))) == 0); + newfdp->fd_lomap[j] |= 1 << (i & NDENTRYMASK); + if (__predict_false(newfdp->fd_lomap[j] == ~0)) { + KASSERT((newfdp->fd_himap[j >> NDENTRYSHIFT] & + (1 << (j & NDENTRYMASK))) == 0); + newfdp->fd_himap[j >> NDENTRYSHIFT] |= + 1 << (j & NDENTRYMASK); } + newlast = i; } - mutex_exit(&fdp->fd_lock); - - /* Discard unused fdfile_t structures. */ - while (__predict_false(fflist != NULL)) { - ff = fflist; - fflist = (void *)ff->ff_file; - ff->ff_file = NULL; - pool_cache_put(fdfile_cache, ff); - nused--; - } - KASSERT(nused >= 0); - KASSERT(newfdp->fd_ofiles[0] == (fdfile_t *)newfdp->fd_dfdfile[0]); - - newfdp->fd_nused = nused; + KASSERT(newdt->dt_ff[0] == (fdfile_t *)newfdp->fd_dfdfile[0]); newfdp->fd_lastfile = newlast; - + fd_checkmaps(newfdp); + mutex_exit(&fdp->fd_lock); + return (newfdp); } @@ -1472,32 +1463,46 @@ fd_free(void) filedesc_t *fdp; fdfile_t *ff; file_t *fp; - int fd, lastfd; - void **discard; + int fd, nf; + fdtab_t *dt; fdp = curlwp->l_fd; - KASSERT(fdp->fd_ofiles[0] == (fdfile_t *)fdp->fd_dfdfile[0]); + KASSERT(fdp->fd_dt->dt_ff[0] == (fdfile_t *)fdp->fd_dfdfile[0]); + KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); + KASSERT(fdp->fd_dtbuiltin.dt_link == NULL); +#ifndef __HAVE_ATOMIC_AS_MEMBAR + membar_exit(); +#endif if (atomic_dec_uint_nv(&fdp->fd_refcnt) > 0) return; /* * Close any files that the process holds open. */ - for (fd = 0, lastfd = fdp->fd_nfiles - 1; fd <= lastfd; fd++) { - ff = fdp->fd_ofiles[fd]; + dt = fdp->fd_dt; + fd_checkmaps(fdp); + for (fd = 0, nf = dt->dt_nfiles; fd < nf; fd++) { + ff = dt->dt_ff[fd]; KASSERT(fd >= NDFDFILE || ff == (fdfile_t *)fdp->fd_dfdfile[fd]); - if ((ff = fdp->fd_ofiles[fd]) == NULL) + if (ff == NULL) continue; if ((fp = ff->ff_file) != NULL) { /* - * Must use fd_close() here as kqueue holds - * long term references to descriptors. + * Must use fd_close() here if there is + * a reference from kqueue. */ - ff->ff_refcnt++; - fd_close(fd); + if (__predict_true(ff->ff_refcnt == 0)) { + ff->ff_file = NULL; + ff->ff_exclose = false; + ff->ff_allocated = false; + closef(fp); + } else { + ff->ff_refcnt++; + fd_close(fd); + } } KASSERT(ff->ff_refcnt == 0); KASSERT(ff->ff_file == NULL); @@ -1505,6 +1510,7 @@ fd_free(void) KASSERT(!ff->ff_allocated); if (fd >= NDFDFILE) { pool_cache_put(fdfile_cache, ff); + dt->dt_ff[fd] = NULL; } } @@ -1512,27 +1518,36 @@ fd_free(void) * Clean out the descriptor table for the next user and return * to the cache. */ - while ((discard = fdp->fd_discard) != NULL) { - fdp->fd_discard = discard[0]; - kmem_free(discard, (uintptr_t)discard[1]); + if (__predict_false(dt != &fdp->fd_dtbuiltin)) { + fd_dtab_free(fdp->fd_dt); + /* Otherwise, done above. */ + memset(&fdp->fd_dtbuiltin.dt_ff[NDFDFILE], 0, + (NDFILE - NDFDFILE) * sizeof(fdp->fd_dtbuiltin.dt_ff[0])); + fdp->fd_dt = &fdp->fd_dtbuiltin; } - if (NDHISLOTS(fdp->fd_nfiles) > NDHISLOTS(NDFILE)) { + if (__predict_false(NDHISLOTS(nf) > NDHISLOTS(NDFILE))) { KASSERT(fdp->fd_himap != fdp->fd_dhimap); KASSERT(fdp->fd_lomap != fdp->fd_dlomap); - fd_map_free(fdp->fd_nfiles, fdp->fd_lomap, fdp->fd_himap); + fd_map_free(nf, fdp->fd_lomap, fdp->fd_himap); } - if (fdp->fd_nfiles > NDFILE) { - KASSERT(fdp->fd_ofiles != fdp->fd_dfiles); - fd_ofile_free(fdp->fd_nfiles, fdp->fd_ofiles); - } - if (fdp->fd_knhash != NULL) { + if (__predict_false(fdp->fd_knhash != NULL)) { hashdone(fdp->fd_knhash, HASH_LIST, fdp->fd_knhashmask); fdp->fd_knhash = NULL; fdp->fd_knhashmask = 0; } else { KASSERT(fdp->fd_knhashmask == 0); } + fdp->fd_dt = &fdp->fd_dtbuiltin; fdp->fd_lastkqfile = -1; + fdp->fd_lastfile = -1; + fdp->fd_freefile = 0; + fdp->fd_exclose = false; + memset(&fdp->fd_startzero, 0, sizeof(*fdp) - + offsetof(filedesc_t, fd_startzero)); + KASSERT(fdp->fd_dtbuiltin.dt_nfiles == NDFILE); + KASSERT(fdp->fd_dtbuiltin.dt_link == NULL); + KASSERT(fdp->fd_dt == &fdp->fd_dtbuiltin); + fd_checkmaps(fdp); pool_cache_put(filedesc_cache, fdp); } @@ -1569,12 +1584,14 @@ fd_dupopen(int old, int *new, int mode, int error) filedesc_t *fdp; fdfile_t *ff; file_t *fp; + fdtab_t *dt; if ((fp = fd_getfile(old)) == NULL) { return EBADF; } fdp = curlwp->l_fd; - ff = fdp->fd_ofiles[old]; + dt = fdp->fd_dt; + ff = dt->dt_ff[old]; /* * There are two cases of interest here. @@ -1600,12 +1617,12 @@ fd_dupopen(int old, int *new, int mode, int error) } /* Copy it. */ - error = fd_dup(fp, 0, new, fdp->fd_ofiles[old]->ff_exclose); + error = fd_dup(fp, 0, new, ff->ff_exclose); break; case EMOVEFD: /* Copy it. */ - error = fd_dup(fp, 0, new, fdp->fd_ofiles[old]->ff_exclose); + error = fd_dup(fp, 0, new, ff->ff_exclose); if (error != 0) { break; } diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index d7e908759267..6c04fd573522 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_event.c,v 1.64 2009/04/04 10:12:51 ad Exp $ */ +/* $NetBSD: kern_event.c,v 1.65 2009/05/24 21:41:26 ad Exp $ */ /*- * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc. @@ -58,7 +58,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.64 2009/04/04 10:12:51 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_event.c,v 1.65 2009/05/24 21:41:26 ad Exp $"); #include #include @@ -879,7 +879,7 @@ kqueue_register(struct kqueue *kq, struct kevent *kev) kmem_free(newkn, sizeof(*newkn)); return EBADF; } - ff = fdp->fd_ofiles[fd]; + ff = fdp->fd_dt->dt_ff[fd]; if (fd <= fdp->fd_lastkqfile) { SLIST_FOREACH(kn, &ff->ff_knlist, kn_link) { if (kq == kn->kn_kq && @@ -944,7 +944,7 @@ kqueue_register(struct kqueue *kq, struct kevent *kev) } else { /* Otherwise, knote is on an fd. */ list = (struct klist *) - &fdp->fd_ofiles[kn->kn_id]->ff_knlist; + &fdp->fd_dt->dt_ff[kn->kn_id]->ff_knlist; if ((int)kn->kn_id > fdp->fd_lastkqfile) fdp->fd_lastkqfile = kn->kn_id; } @@ -1377,7 +1377,7 @@ kqueue_close(file_t *fp) mutex_enter(&fdp->fd_lock); for (i = 0; i <= fdp->fd_lastkqfile; i++) { - if ((ff = fdp->fd_ofiles[i]) == NULL) + if ((ff = fdp->fd_dt->dt_ff[i]) == NULL) continue; kqueue_doclose(kq, (struct klist *)&ff->ff_knlist, i); } @@ -1452,7 +1452,7 @@ knote_fdclose(int fd) filedesc_t *fdp; fdp = curlwp->l_fd; - list = (struct klist *)&fdp->fd_ofiles[fd]->ff_knlist; + list = (struct klist *)&fdp->fd_dt->dt_ff[fd]->ff_knlist; mutex_enter(&fdp->fd_lock); while ((kn = SLIST_FIRST(list)) != NULL) { knote_detach(kn, fdp, true); @@ -1485,7 +1485,7 @@ knote_detach(struct knote *kn, filedesc_t *fdp, bool dofop) /* Remove from descriptor table. */ if (kn->kn_fop->f_isfd) - list = (struct klist *)&fdp->fd_ofiles[kn->kn_id]->ff_knlist; + list = (struct klist *)&fdp->fd_dt->dt_ff[kn->kn_id]->ff_knlist; else list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index cc042adffc81..aedc9ec6d440 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_sig.c,v 1.297 2009/03/29 05:02:46 rmind Exp $ */ +/* $NetBSD: kern_sig.c,v 1.298 2009/05/24 21:41:26 ad Exp $ */ /*- * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -66,7 +66,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_sig.c,v 1.297 2009/03/29 05:02:46 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_sig.c,v 1.298 2009/05/24 21:41:26 ad Exp $"); #include "opt_ptrace.h" #include "opt_compat_sunos.h" @@ -986,6 +986,7 @@ kpsignal(struct proc *p, ksiginfo_t *ksi, void *data) { fdfile_t *ff; file_t *fp; + fdtab_t *dt; KASSERT(!cpu_intr_p()); KASSERT(mutex_owned(proc_lock)); @@ -996,8 +997,9 @@ kpsignal(struct proc *p, ksiginfo_t *ksi, void *data) /* XXXSMP locking */ ksi->ksi_fd = -1; - for (fd = 0; fd < fdp->fd_nfiles; fd++) { - if ((ff = fdp->fd_ofiles[fd]) == NULL) + dt = fdp->fd_dt; + for (fd = 0; fd < dt->dt_nfiles; fd++) { + if ((ff = dt->dt_ff[fd]) == NULL) continue; if ((fp = ff->ff_file) == NULL) continue; diff --git a/sys/kern/subr_exec_fd.c b/sys/kern/subr_exec_fd.c index 40861b6f600c..598e29f85ccc 100644 --- a/sys/kern/subr_exec_fd.c +++ b/sys/kern/subr_exec_fd.c @@ -1,4 +1,4 @@ -/* $NetBSD: subr_exec_fd.c,v 1.1 2008/11/18 13:01:41 pooka Exp $ */ +/* $NetBSD: subr_exec_fd.c,v 1.2 2009/05/24 21:41:26 ad Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -31,7 +31,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: subr_exec_fd.c,v 1.1 2008/11/18 13:01:41 pooka Exp $"); +__KERNEL_RCSID(0, "$NetBSD: subr_exec_fd.c,v 1.2 2009/05/24 21:41:26 ad Exp $"); #include #include @@ -51,6 +51,7 @@ fd_closeexec(void) filedesc_t *fdp; fdfile_t *ff; lwp_t *l; + fdtab_t *dt; int fd; l = curlwp; @@ -73,9 +74,10 @@ fd_closeexec(void) return; } fdp->fd_exclose = false; + dt = fdp->fd_dt; for (fd = 0; fd <= fdp->fd_lastfile; fd++) { - if ((ff = fdp->fd_ofiles[fd]) == NULL) { + if ((ff = dt->dt_ff[fd]) == NULL) { KASSERT(fd >= NDFDFILE); continue; } @@ -111,6 +113,7 @@ fd_checkstd(void) struct nameidata nd; filedesc_t *fdp; file_t *fp; + fdtab_t *dt; struct proc *pp; int fd, i, error, flags = FREAD|FWRITE; char closed[CHECK_UPTO * 3 + 1], which[3 + 1]; @@ -119,10 +122,11 @@ fd_checkstd(void) closed[0] = '\0'; if ((fdp = p->p_fd) == NULL) return (0); + dt = fdp->fd_dt; for (i = 0; i < CHECK_UPTO; i++) { KASSERT(i >= NDFDFILE || - fdp->fd_ofiles[i] == (fdfile_t *)fdp->fd_dfdfile[i]); - if (fdp->fd_ofiles[i]->ff_file != NULL) + dt->dt_ff[i] == (fdfile_t *)fdp->fd_dfdfile[i]); + if (dt->dt_ff[i]->ff_file != NULL) continue; snprintf(which, sizeof(which), ",%d", i); strlcat(closed, which, sizeof(closed)); diff --git a/sys/kern/sys_aio.c b/sys/kern/sys_aio.c index 91a908ef91af..de91b390ba65 100644 --- a/sys/kern/sys_aio.c +++ b/sys/kern/sys_aio.c @@ -1,4 +1,4 @@ -/* $NetBSD: sys_aio.c,v 1.23 2009/02/22 20:28:06 ad Exp $ */ +/* $NetBSD: sys_aio.c,v 1.24 2009/05/24 21:41:26 ad Exp $ */ /* * Copyright (c) 2007, Mindaugas Rasiukevicius @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.23 2009/02/22 20:28:06 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_aio.c,v 1.24 2009/05/24 21:41:26 ad Exp $"); #ifdef _KERNEL_OPT #include "opt_ddb.h" @@ -622,15 +622,16 @@ sys_aio_cancel(struct lwp *l, const struct sys_aio_cancel_args *uap, register_t struct lio_req *lio; struct filedesc *fdp = p->p_fd; unsigned int cn, errcnt, fildes; + fdtab_t *dt; TAILQ_HEAD(, aio_job) tmp_jobs_list; /* Check for invalid file descriptor */ fildes = (unsigned int)SCARG(uap, fildes); - if (fildes >= fdp->fd_nfiles) + dt = fdp->fd_dt; + if (fildes >= dt->dt_nfiles) return EBADF; - membar_consumer(); - if (fdp->fd_ofiles[fildes] == NULL || fdp->fd_ofiles[fildes]->ff_file == NULL) + if (dt->dt_ff[fildes] == NULL || dt->dt_ff[fildes]->ff_file == NULL) return EBADF; /* Check if AIO structure is initialized */ diff --git a/sys/kern/sys_descrip.c b/sys/kern/sys_descrip.c index 9678a8f0789b..193b19ca4acf 100644 --- a/sys/kern/sys_descrip.c +++ b/sys/kern/sys_descrip.c @@ -1,4 +1,4 @@ -/* $NetBSD: sys_descrip.c,v 1.12 2009/03/28 21:42:19 rmind Exp $ */ +/* $NetBSD: sys_descrip.c,v 1.13 2009/05/24 21:41:26 ad Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -67,7 +67,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.12 2009/03/28 21:42:19 rmind Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.13 2009/05/24 21:41:26 ad Exp $"); #include #include @@ -356,7 +356,7 @@ sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval) if ((fp = fd_getfile(fd)) == NULL) return (EBADF); - ff = fdp->fd_ofiles[fd]; + ff = fdp->fd_dt->dt_ff[fd]; if ((cmd & F_FSCTL)) { error = fcntl_forfs(fd, fp, cmd, SCARG(uap, arg)); diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 92f502e009d9..d3211f962046 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -1,4 +1,4 @@ -/* $NetBSD: sys_generic.c,v 1.122 2009/05/17 10:08:38 ad Exp $ */ +/* $NetBSD: sys_generic.c,v 1.123 2009/05/24 21:41:26 ad Exp $ */ /*- * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -70,7 +70,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.122 2009/05/17 10:08:38 ad Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.123 2009/05/24 21:41:26 ad Exp $"); #include #include @@ -543,7 +543,7 @@ sys_ioctl(struct lwp *l, const struct sys_ioctl_args *uap, register_t *retval) goto out; } - ff = fdp->fd_ofiles[SCARG(uap, fd)]; + ff = fdp->fd_dt->dt_ff[SCARG(uap, fd)]; switch (com = SCARG(uap, com)) { case FIONCLEX: ff->ff_exclose = false; diff --git a/sys/kern/sys_select.c b/sys/kern/sys_select.c index dce26a7eafe6..3bb719eb2b01 100644 --- a/sys/kern/sys_select.c +++ b/sys/kern/sys_select.c @@ -1,4 +1,4 @@ -/* $NetBSD: sys_select.c,v 1.14 2009/03/29 19:21:19 christos Exp $ */ +/* $NetBSD: sys_select.c,v 1.15 2009/05/24 21:41:26 ad Exp $ */ /*- * Copyright (c) 2007, 2008, 2009 The NetBSD Foundation, Inc. @@ -70,7 +70,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.14 2009/03/29 19:21:19 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD: sys_select.c,v 1.15 2009/05/24 21:41:26 ad Exp $"); #include #include @@ -218,7 +218,7 @@ selcommon(lwp_t *l, register_t *retval, int nd, fd_set *u_in, sizeof(fd_mask) * 6]; proc_t * const p = l->l_proc; char *bits; - int ncoll, error, timo; + int ncoll, error, timo, nf; size_t ni; sigset_t oldmask; struct timespec sleepts; @@ -228,9 +228,10 @@ selcommon(lwp_t *l, register_t *retval, int nd, fd_set *u_in, error = 0; if (nd < 0) return (EINVAL); - if (nd > p->p_fd->fd_nfiles) { + nf = p->p_fd->fd_dt->dt_nfiles; + if (nd > nf) { /* forgiving; slightly wrong */ - nd = p->p_fd->fd_nfiles; + nd = nf; } ni = howmany(nd, NFDBITS) * sizeof(fd_mask); if (ni * 6 > sizeof(smallbits)) { @@ -427,14 +428,15 @@ pollcommon(lwp_t *l, register_t *retval, struct pollfd *u_fds, u_int nfds, proc_t * const p = l->l_proc; sigset_t oldmask; int ncoll, error, timo; - size_t ni; + size_t ni, nf; struct timespec sleepts; selcpu_t *sc; kmutex_t *lock; - if (nfds > p->p_fd->fd_nfiles) { + nf = p->p_fd->fd_dt->dt_nfiles; + if (nfds > nf) { /* forgiving; slightly wrong */ - nfds = p->p_fd->fd_nfiles; + nfds = nf; } ni = nfds * sizeof(struct pollfd); if (ni > sizeof(smallfds)) { diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index bb21745e955d..db68b33005c5 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1,4 +1,4 @@ -/* $NetBSD: uipc_usrreq.c,v 1.125 2009/05/04 06:02:40 yamt Exp $ */ +/* $NetBSD: uipc_usrreq.c,v 1.126 2009/05/24 21:41:26 ad Exp $ */ /*- * Copyright (c) 1998, 2000, 2004, 2008, 2009 The NetBSD Foundation, Inc. @@ -96,7 +96,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.125 2009/05/04 06:02:40 yamt Exp $"); +__KERNEL_RCSID(0, "$NetBSD: uipc_usrreq.c,v 1.126 2009/05/24 21:41:26 ad Exp $"); #include #include @@ -1338,7 +1338,7 @@ unp_internalize(struct mbuf **controlp) fdp = (int *)CMSG_DATA(cm) + nfds; rp = files + nfds; for (i = 0; i < nfds; i++) { - fp = fdescp->fd_ofiles[*--fdp]->ff_file; + fp = fdescp->fd_dt->dt_ff[*--fdp]->ff_file; KASSERT(fp != NULL); mutex_enter(&fp->f_lock); *--rp = fp; diff --git a/sys/miscfs/fdesc/fdesc_vfsops.c b/sys/miscfs/fdesc/fdesc_vfsops.c index 754ddfa67aa7..35d9d9be0bb2 100644 --- a/sys/miscfs/fdesc/fdesc_vfsops.c +++ b/sys/miscfs/fdesc/fdesc_vfsops.c @@ -1,4 +1,4 @@ -/* $NetBSD: fdesc_vfsops.c,v 1.79 2009/03/14 15:36:22 dsl Exp $ */ +/* $NetBSD: fdesc_vfsops.c,v 1.80 2009/05/24 21:41:26 ad Exp $ */ /* * Copyright (c) 1992, 1993, 1995 @@ -41,7 +41,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: fdesc_vfsops.c,v 1.79 2009/03/14 15:36:22 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fdesc_vfsops.c,v 1.80 2009/05/24 21:41:26 ad Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_netbsd.h" @@ -164,13 +164,13 @@ fdesc_root(struct mount *mp, struct vnode **vpp) int fdesc_statvfs(struct mount *mp, struct statvfs *sbp) { - struct lwp *l = curlwp; - struct filedesc *fdp; - struct proc *p; + lwp_t *l = curlwp; + proc_t *p; int lim; int i; int last; int freefd; + fdtab_t *dt; /* * Compute number of free file descriptors. @@ -179,20 +179,20 @@ fdesc_statvfs(struct mount *mp, struct statvfs *sbp) * of open files... ] */ p = l->l_proc; + dt = l->l_fd->fd_dt; lim = p->p_rlimit[RLIMIT_NOFILE].rlim_cur; - fdp = p->p_fd; - last = min(fdp->fd_nfiles, lim); + last = min(dt->dt_nfiles, lim); freefd = 0; - for (i = fdp->fd_freefile; i < last; i++) - if (fdp->fd_ofiles[i] == NULL) + for (i = l->l_fd->fd_freefile; i < last; i++) + if (dt->dt_ff[i]->ff_file == NULL) freefd++; /* * Adjust for the fact that the fdesc array may not * have been fully allocated yet. */ - if (fdp->fd_nfiles < lim) - freefd += (lim - fdp->fd_nfiles); + if (dt->dt_nfiles < lim) + freefd += (lim - dt->dt_nfiles); sbp->f_bsize = DEV_BSIZE; sbp->f_frsize = DEV_BSIZE; diff --git a/sys/miscfs/fdesc/fdesc_vnops.c b/sys/miscfs/fdesc/fdesc_vnops.c index 3f1542377430..c753374beed0 100644 --- a/sys/miscfs/fdesc/fdesc_vnops.c +++ b/sys/miscfs/fdesc/fdesc_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: fdesc_vnops.c,v 1.106 2009/03/15 17:22:37 cegger Exp $ */ +/* $NetBSD: fdesc_vnops.c,v 1.107 2009/05/24 21:41:26 ad Exp $ */ /* * Copyright (c) 1992, 1993 @@ -41,7 +41,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: fdesc_vnops.c,v 1.106 2009/03/15 17:22:37 cegger Exp $"); +__KERNEL_RCSID(0, "$NetBSD: fdesc_vnops.c,v 1.107 2009/05/24 21:41:26 ad Exp $"); #include #include @@ -276,12 +276,13 @@ fdesc_lookup(void *v) struct lwp *l = curlwp; const char *pname = cnp->cn_nameptr; struct proc *p = l->l_proc; - filedesc_t *fdp = p->p_fd; - int numfiles = fdp->fd_nfiles; unsigned fd = 0; int error; struct vnode *fvp; const char *ln; + fdtab_t *dt; + + dt = curlwp->l_fd->fd_dt; if (cnp->cn_namelen == 1 && *pname == '.') { *vpp = dvp; @@ -370,7 +371,7 @@ fdesc_lookup(void *v) fd = 0; while (*pname >= '0' && *pname <= '9') { fd = 10 * fd + *pname++ - '0'; - if (fd >= numfiles) + if (fd >= dt->dt_nfiles) break; } @@ -379,14 +380,11 @@ fdesc_lookup(void *v) goto bad; } - mutex_enter(&fdp->fd_lock); - if (fd >= numfiles ||fdp->fd_ofiles[fd] == NULL || - fdp->fd_ofiles[fd]->ff_file == NULL) { - mutex_exit(&fdp->fd_lock); + if (fd >= dt->dt_nfiles || dt->dt_ff[fd] == NULL || + dt->dt_ff[fd]->ff_file == NULL) { error = EBADF; goto bad; } - mutex_exit(&fdp->fd_lock); error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp); if (error) @@ -650,12 +648,12 @@ fdesc_readdir(void *v) } */ *ap = v; struct uio *uio = ap->a_uio; struct dirent d; - filedesc_t *fdp; off_t i; int j; int error; off_t *cookies = NULL; int ncookies; + fdtab_t *dt; switch (VTOFDESC(ap->a_vp)->fd_type) { case Fctty: @@ -668,7 +666,7 @@ fdesc_readdir(void *v) break; } - fdp = curproc->p_fd; + dt = curlwp->l_fd->fd_dt; if (uio->uio_resid < UIO_MX) return EINVAL; @@ -709,14 +707,11 @@ fdesc_readdir(void *v) case FD_STDIN: case FD_STDOUT: case FD_STDERR: - if (fdp == NULL) - continue; if ((ft->ft_fileno - FD_STDIN) >= - fdp->fd_nfiles) + dt->dt_nfiles) continue; - membar_consumer(); - if (fdp->fd_ofiles[ft->ft_fileno - FD_STDIN] - == NULL || fdp->fd_ofiles[ft->ft_fileno - + if (dt->dt_ff[ft->ft_fileno - FD_STDIN] + == NULL || dt->dt_ff[ft->ft_fileno - FD_STDIN]->ff_file == NULL) continue; break; @@ -733,16 +728,15 @@ fdesc_readdir(void *v) *cookies++ = i + 1; } } else { - int nfdp = fdp ? fdp->fd_nfiles : 0; membar_consumer(); if (ap->a_ncookies) { - ncookies = min(ncookies, nfdp + 2); + ncookies = min(ncookies, dt->dt_nfiles + 2); cookies = malloc(ncookies * sizeof(off_t), M_TEMP, M_WAITOK); *ap->a_cookies = cookies; *ap->a_ncookies = ncookies; } - for (; i - 2 < nfdp && uio->uio_resid >= UIO_MX; i++) { + for (; i - 2 < dt->dt_nfiles && uio->uio_resid >= UIO_MX; i++) { switch (i) { case 0: case 1: @@ -754,10 +748,9 @@ fdesc_readdir(void *v) break; default: - KASSERT(fdp != NULL); j = (int)i - 2; - if (fdp == NULL || fdp->fd_ofiles[j] == NULL || - fdp->fd_ofiles[j]->ff_file == NULL) + if (dt->dt_ff[j] == NULL || + dt->dt_ff[j]->ff_file == NULL) continue; d.d_fileno = j + FD_STDIN; d.d_namlen = sprintf(d.d_name, "%d", j); diff --git a/sys/miscfs/portal/portal_vnops.c b/sys/miscfs/portal/portal_vnops.c index e66e0a9b41c8..1416a1a1c9c8 100644 --- a/sys/miscfs/portal/portal_vnops.c +++ b/sys/miscfs/portal/portal_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: portal_vnops.c,v 1.82 2009/03/14 15:36:23 dsl Exp $ */ +/* $NetBSD: portal_vnops.c,v 1.83 2009/05/24 21:41:26 ad Exp $ */ /* * Copyright (c) 1992, 1993 @@ -40,7 +40,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: portal_vnops.c,v 1.82 2009/03/14 15:36:23 dsl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: portal_vnops.c,v 1.83 2009/05/24 21:41:26 ad Exp $"); #include #include @@ -483,7 +483,7 @@ portal_open(void *v) * Check that the mode the file is being opened for is a subset * of the mode of the existing descriptor. */ - fp = l->l_proc->p_fd->fd_ofiles[fd]->ff_file; + fp = l->l_fd->fd_dt->dt_ff[fd]->ff_file; if (((ap->a_mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { portal_closefd(l, fd); /* XXXNJWLWP */ error = EACCES; diff --git a/sys/miscfs/procfs/procfs_vnops.c b/sys/miscfs/procfs/procfs_vnops.c index ae3f1c6f935c..a534c3dfc25a 100644 --- a/sys/miscfs/procfs/procfs_vnops.c +++ b/sys/miscfs/procfs/procfs_vnops.c @@ -1,4 +1,4 @@ -/* $NetBSD: procfs_vnops.c,v 1.173 2008/12/17 20:51:36 cegger Exp $ */ +/* $NetBSD: procfs_vnops.c,v 1.174 2009/05/24 21:41:26 ad Exp $ */ /*- * Copyright (c) 2006, 2007, 2008 The NetBSD Foundation, Inc. @@ -105,7 +105,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.173 2008/12/17 20:51:36 cegger Exp $"); +__KERNEL_RCSID(0, "$NetBSD: procfs_vnops.c,v 1.174 2009/05/24 21:41:26 ad Exp $"); #include #include @@ -1338,7 +1338,7 @@ procfs_readdir(void *v) return ESRCH; } - nfd = p->p_fd->fd_nfiles; + nfd = p->p_fd->fd_dt->dt_nfiles; lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfiles); if (i >= lim) { diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index e3b02e1ba400..63f24f7f8c1a 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -1,4 +1,4 @@ -/* $NetBSD: filedesc.h,v 1.54 2009/05/23 18:28:05 ad Exp $ */ +/* $NetBSD: filedesc.h,v 1.55 2009/05/24 21:41:26 ad Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -94,8 +94,8 @@ * in use. Locks: * * : unlocked + * a atomic operations + filedesc_t::fd_lock in some cases * d filedesc_t::fd_lock - * f fdfile_t::ff_lock, may be stable if reference held * * Note that ff_exclose and ff_allocated are likely to be byte sized * (bool). In general adjacent sub-word sized fields must be locked @@ -104,19 +104,29 @@ * it's invalid. */ typedef struct fdfile { - kmutex_t ff_lock; /* :: lock on structure */ bool ff_exclose; /* :: close on exec flag */ bool ff_allocated; /* d: descriptor slot is allocated */ - u_int ff_refcnt; /* f: reference count on structure */ - struct file *ff_file; /* f: pointer to file if open */ - SLIST_HEAD(,knote) ff_knlist; /* f: knotes attached to this fd */ - kcondvar_t ff_closing; /* f: notifier for close */ + u_int ff_refcnt; /* a: reference count on structure */ + struct file *ff_file; /* d: pointer to file if open */ + SLIST_HEAD(,knote) ff_knlist; /* d: knotes attached to this fd */ + kcondvar_t ff_closing; /* d: notifier for close */ } fdfile_t; /* Reference count */ #define FR_CLOSING (0x80000000) /* closing: must interlock */ #define FR_MASK (~FR_CLOSING) /* reference count */ +/* + * Open file table, potentially many 'active' tables per filedesc_t + * in a multi-threaded process, or with a shared filedesc_t (clone()). + * nfiles is first to avoid pointer arithmetic. + */ +typedef struct fdtab { + u_int dt_nfiles; /* number of open files allocated */ + struct fdtab *dt_link; /* for lists of dtab */ + fdfile_t *dt_ff[NDFILE]; /* file structures for open fds */ +} fdtab_t; + typedef struct filedesc { /* * Built-in fdfile_t records first, since they have strict @@ -127,29 +137,27 @@ typedef struct filedesc { * All of the remaining fields are locked by fd_lock. */ kmutex_t fd_lock; /* lock on structure */ - fdfile_t **fd_ofiles; /* file structures for open files */ + fdtab_t * volatile fd_dt; /* active descriptor table */ uint32_t *fd_himap; /* each bit points to 32 fds */ uint32_t *fd_lomap; /* bitmap of free fds */ - void *fd_discard; /* old fd_ofiles tables to discard */ struct klist *fd_knhash; /* hash of attached non-fd knotes */ int fd_lastkqfile; /* max descriptor for kqueue */ int fd_lastfile; /* high-water mark of fd_ofiles */ int fd_refcnt; /* reference count */ - int fd_nfiles; /* number of open files allocated */ u_long fd_knhashmask; /* size of fd_knhash */ -#define fd_startzero fd_freefile /* area to zero on return to cache */ int fd_freefile; /* approx. next free file */ - int fd_nused; /* number of slots in use */ + int fd_unused; /* unused */ bool fd_exclose; /* non-zero if >0 fd with EXCLOSE */ /* - * These arrays are used when the number of open files is + * This structure is used when the number of open files is * <= NDFILE, and are then pointed to by the pointers above. */ - fdfile_t *fd_dfiles[NDFILE]; + fdtab_t fd_dtbuiltin; /* * These arrays are used when the number of open files is * <= 1024, and are then pointed to by the pointers above. */ +#define fd_startzero fd_dhimap /* area to zero on return to cache */ uint32_t fd_dhimap[NDENTRIES >> NDENTRYSHIFT]; uint32_t fd_dlomap[NDENTRIES]; } filedesc_t; @@ -196,9 +204,6 @@ int fd_getsock(unsigned, struct socket **); void fd_putvnode(unsigned); void fd_putsock(unsigned); int fd_close(unsigned); -void fd_used(filedesc_t *, unsigned); -void fd_unused(filedesc_t *, unsigned); -bool fd_isused(filedesc_t *, unsigned); int fd_dup(file_t *, int, int *, bool); int fd_dup2(file_t *, unsigned); int fd_clone(file_t *, unsigned, int, const struct fileops *, void *); @@ -226,6 +231,7 @@ int do_fcntl_lock(int, int, struct flock *); int do_posix_fadvise(int, off_t, off_t, int); extern kmutex_t filelist_lock; +extern filedesc_t filedesc0; #endif /* _KERNEL */ diff --git a/usr.bin/fstat/fstat.c b/usr.bin/fstat/fstat.c index 726a482ea966..c47074d7a2f0 100644 --- a/usr.bin/fstat/fstat.c +++ b/usr.bin/fstat/fstat.c @@ -1,4 +1,4 @@ -/* $NetBSD: fstat.c,v 1.88 2009/04/12 06:36:12 lukem Exp $ */ +/* $NetBSD: fstat.c,v 1.89 2009/05/24 21:41:44 ad Exp $ */ /*- * Copyright (c) 1988, 1993 @@ -39,7 +39,7 @@ __COPYRIGHT("@(#) Copyright (c) 1988, 1993\ #if 0 static char sccsid[] = "@(#)fstat.c 8.3 (Berkeley) 5/2/95"; #else -__RCSID("$NetBSD: fstat.c,v 1.88 2009/04/12 06:36:12 lukem Exp $"); +__RCSID("$NetBSD: fstat.c,v 1.89 2009/05/24 21:41:44 ad Exp $"); #endif #endif /* not lint */ @@ -313,6 +313,7 @@ dofiles(struct kinfo_proc2 *p) int i; struct filedesc filed; struct cwdinfo cwdi; + struct fdtab dt; Uname = user_from_uid(p->p_uid, 0); Pid = p->p_pid; @@ -321,16 +322,20 @@ dofiles(struct kinfo_proc2 *p) if (p->p_fd == 0 || p->p_cwdi == 0) return; if (!KVM_READ(p->p_fd, &filed, sizeof (filed))) { - warnx("can't read filedesc at %#llx for pid %d", (unsigned long long)p->p_fd, Pid); + warnx("can't read filedesc at %p for pid %d", (void *)(uintptr_t)p->p_fd, Pid); return; } if (!KVM_READ(p->p_cwdi, &cwdi, sizeof(cwdi))) { - warnx("can't read cwdinfo at %#llx for pid %d", (unsigned long long)p->p_cwdi, Pid); + warnx("can't read cwdinfo at %p for pid %d", (void *)(uintptr_t)p->p_cwdi, Pid); return; } - if (filed.fd_nfiles < 0 || filed.fd_lastfile >= filed.fd_nfiles || + if (!KVM_READ(filed.fd_dt, &dt, sizeof(dt))) { + warnx("can't read dtab at %p for pid %d", filed.fd_dt, Pid); + return; + } + if ((unsigned)filed.fd_lastfile >= dt.dt_nfiles || filed.fd_freefile > filed.fd_lastfile + 1) { - dprintf("filedesc corrupted at %#llx for pid %d", (unsigned long long)p->p_fd, Pid); + dprintf("filedesc corrupted at %p for pid %d", (void *)(uintptr_t)p->p_fd, Pid); return; } /* @@ -355,10 +360,10 @@ dofiles(struct kinfo_proc2 *p) */ #define FPSIZE (sizeof (fdfile_t *)) ALLOC_OFILES(filed.fd_lastfile+1); - if (!KVM_READ(filed.fd_ofiles, ofiles, + if (!KVM_READ(&filed.fd_dt->dt_ff, ofiles, (filed.fd_lastfile+1) * FPSIZE)) { dprintf("can't read file structures at %p for pid %d", - filed.fd_ofiles, Pid); + &filed.fd_dt->dt_ff, Pid); return; } for (i = 0; i <= filed.fd_lastfile; i++) {