Additional fixes/enhancements:

1) Comply with the way buffercache(9) is intended to be used. Now we
       read in single blocks of EFS_BB_SIZE, never taking in variable
       length extents with a single bread() call.

    2) Handle symlinks with more than one extent. There's no reason for
       this to ever happen, but it's handled now.

    3) Finally, add a hint to our iteration initialiser so we can start
       from the desired offset, rather than naively looping through from
       the beginning each time. Since we can binary search the correct
       location quickly, this improves large sequential reads by about
       40% with 128MB files. Improvement should increase with file size.
This commit is contained in:
rumble 2007-07-04 19:24:09 +00:00
parent 933c2e67b5
commit 0aef936cbe
5 changed files with 264 additions and 141 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: efs_extent.h,v 1.2 2007/06/30 15:56:16 rumble Exp $ */
/* $NetBSD: efs_extent.h,v 1.3 2007/07/04 19:24:09 rumble Exp $ */
/*
* Copyright (c) 2006 Stephen M. Rumble <rumble@ephemeral.org>
@ -59,4 +59,6 @@ struct efs_extent {
#define EFS_EXTENT_BN_MASK 0x00ffffff
#define EFS_EXTENT_OFFSET_MASK 0x00ffffff
#define EFS_EXTENTS_PER_BB (EFS_BB_SIZE / sizeof(struct efs_dextent))
#endif /* !_FS_EFS_EFS_EXTENT_H_ */

View File

@ -1,4 +1,4 @@
/* $NetBSD: efs_subr.c,v 1.1 2007/06/29 23:30:29 rumble Exp $ */
/* $NetBSD: efs_subr.c,v 1.2 2007/07/04 19:24:09 rumble Exp $ */
/*
* Copyright (c) 2006 Stephen M. Rumble <rumble@ephemeral.org>
@ -17,7 +17,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: efs_subr.c,v 1.1 2007/06/29 23:30:29 rumble Exp $");
__KERNEL_RCSID(0, "$NetBSD: efs_subr.c,v 1.2 2007/07/04 19:24:09 rumble Exp $");
#include <sys/param.h>
#include <sys/kauth.h>
@ -42,8 +42,6 @@ __KERNEL_RCSID(0, "$NetBSD: efs_subr.c,v 1.1 2007/06/29 23:30:29 rumble Exp $");
#include <fs/efs/efs_inode.h>
#include <fs/efs/efs_subr.h>
MALLOC_DECLARE(M_EFSTMP);
struct pool efs_inode_pool;
/*
@ -165,7 +163,7 @@ efs_read_inode(struct efs_mount *emp, ino_t ino, struct lwp *l,
sbp = &emp->em_sb;
efs_locate_inode(ino, sbp, &bboff, &index);
err = efs_bread(emp, bboff, EFS_BY2BB(EFS_DINODE_SIZE), l, &bp);
err = efs_bread(emp, bboff, l, &bp);
if (err) {
brelse(bp);
return (err);
@ -182,19 +180,16 @@ efs_read_inode(struct efs_mount *emp, ino_t ino, struct lwp *l,
* we as EFS block sizing.
*
* bboff: basic block offset
* nbb: number of basic blocks to be read
*
* Returns 0 on success.
*/
int
efs_bread(struct efs_mount *emp, uint32_t bboff, int nbb, struct lwp *l,
struct buf **bp)
efs_bread(struct efs_mount *emp, uint32_t bboff, struct lwp *l, struct buf **bp)
{
KASSERT(nbb > 0);
KASSERT(bboff < EFS_SIZE_MAX);
return (bread(emp->em_devvp, (daddr_t)bboff * (EFS_BB_SIZE / DEV_BSIZE),
nbb * EFS_BB_SIZE, (l == NULL) ? NOCRED : l->l_cred, bp));
EFS_BB_SIZE, (l == NULL) ? NOCRED : l->l_cred, bp));
}
/*
@ -320,25 +315,25 @@ efs_extent_lookup(struct efs_mount *emp, struct efs_extent *ex,
int i, err;
/*
* Read in the entire extent, evaluating all of the dirblks until we
* find our entry. If we don't, return ENOENT.
* Read in each of the dirblks until we find our entry.
* If we don't, return ENOENT.
*/
err = efs_bread(emp, ex->ex_bn, ex->ex_length, NULL, &bp);
if (err) {
printf("efs: warning: invalid extent descriptor\n");
brelse(bp);
return (err);
}
for (i = 0; i < ex->ex_length; i++) {
db = ((struct efs_dirblk *)bp->b_data) + i;
err = efs_bread(emp, ex->ex_bn + i, NULL, &bp);
if (err) {
printf("efs: warning: invalid extent descriptor\n");
brelse(bp);
return (err);
}
db = (struct efs_dirblk *)bp->b_data;
if (efs_dirblk_lookup(db, cn, ino) == 0) {
brelse(bp);
return (0);
}
brelse(bp);
}
brelse(bp);
return (ENOENT);
}
@ -360,14 +355,12 @@ efs_inode_lookup(struct efs_mount *emp, struct efs_inode *ei,
KASSERT(efs_is_inode_synced(ei) == 0);
KASSERT((ei->ei_mode & S_IFMT) == S_IFDIR);
efs_extent_iterator_init(&exi, ei);
efs_extent_iterator_init(&exi, ei, 0);
while ((ret = efs_extent_iterator_next(&exi, &ex)) == 0) {
if (efs_extent_lookup(emp, &ex, cn, ino) == 0) {
efs_extent_iterator_free(&exi);
return (0);
}
}
efs_extent_iterator_free(&exi);
return ((ret == -1) ? ENOENT : ret);
}
@ -407,17 +400,167 @@ efs_extent_to_dextent(struct efs_extent *ex, struct efs_dextent *dex)
/*
* Initialise an extent iterator.
*
* If start_hint is non-0, attempt to set up the iterator beginning with the
* extent descriptor in which the start_hint'th byte exists. Callers must not
* expect success (this is simply an optimisation), so we reserve the right
* to start from the beginning.
*/
void
efs_extent_iterator_init(struct efs_extent_iterator *exi, struct efs_inode *eip)
efs_extent_iterator_init(struct efs_extent_iterator *exi, struct efs_inode *eip,
off_t start_hint)
{
struct efs_extent ex, ex2;
struct buf *bp;
struct efs_mount *emp = VFSTOEFS(eip->ei_vp->v_mount);
off_t offset, length, next;
int i, err, numextents, numinextents;
int hi, lo, mid;
int indir;
exi->exi_eip = eip;
exi->exi_next = 0;
exi->exi_dnext = 0;
exi->exi_innext = 0;
exi->exi_incache = NULL;
exi->exi_nincache = 0;
exi->exi_eip = eip;
exi->exi_next = 0;
exi->exi_dnext = 0;
exi->exi_innext = 0;
if (start_hint == 0)
return;
/* force iterator to end if hint is too big */
if (start_hint >= eip->ei_size) {
exi->exi_next = eip->ei_numextents;
return;
}
/*
* Use start_hint to jump to the right extent descriptor. We'll
* iterate over the 12 indirect extents because it's cheap, then
* bring the appropriate vector into core and binary search it.
*/
/*
* Handle the small file case separately first...
*/
if (eip->ei_numextents <= EFS_DIRECTEXTENTS) {
for (i = 0; i < eip->ei_numextents; i++) {
efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex);
offset = ex.ex_offset * EFS_BB_SIZE;
length = ex.ex_length * EFS_BB_SIZE;
if (start_hint >= offset &&
start_hint < (offset + length)) {
exi->exi_next = exi->exi_dnext = i;
return;
}
}
/* shouldn't get here, no? */
EFS_DPRINTF(("efs_extent_iterator_init: bad direct extents\n"));
return;
}
/*
* Now do the large files with indirect extents...
*
* The first indirect extent's ex_offset field contains the
* number of indirect extents used.
*/
efs_dextent_to_extent(&eip->ei_di.di_extents[0], &ex);
numinextents = ex.ex_offset;
if (numinextents < 1 || numinextents >= EFS_DIRECTEXTENTS) {
EFS_DPRINTF(("efs_extent_iterator_init: bad ex.ex_offset\n"));
return;
}
next = 0;
indir = -1;
numextents = 0;
for (i = 0; i < numinextents; i++) {
efs_dextent_to_extent(&eip->ei_di.di_extents[i], &ex);
err = efs_bread(emp, ex.ex_bn, NULL, &bp);
if (err) {
brelse(bp);
return;
}
efs_dextent_to_extent((struct efs_dextent *)bp->b_data, &ex2);
brelse(bp);
offset = ex2.ex_offset * EFS_BB_SIZE;
if (offset > start_hint) {
indir = MAX(0, i - 1);
break;
}
next += numextents;
numextents = ex.ex_length * EFS_EXTENTS_PER_BB;
numextents = MIN(numextents, eip->ei_numextents);
}
/*
* We hit the end, so assume it's in the last extent.
*/
if (indir == -1)
indir = numinextents - 1;
/*
* Binary search to find our desired direct extent.
*/
lo = 0;
mid = 0;
hi = numextents - 1;
efs_dextent_to_extent(&eip->ei_di.di_extents[indir], &ex);
while (lo <= hi) {
int bboff;
int index;
mid = (lo + hi) / 2;
bboff = mid / EFS_EXTENTS_PER_BB;
index = mid % EFS_EXTENTS_PER_BB;
err = efs_bread(emp, ex.ex_bn + bboff, NULL, &bp);
if (err) {
brelse(bp);
EFS_DPRINTF(("efs_extent_iterator_init: bsrch read\n"));
return;
}
efs_dextent_to_extent((struct efs_dextent *)bp->b_data + index,
&ex2);
brelse(bp);
offset = ex2.ex_offset * EFS_BB_SIZE;
length = ex2.ex_length * EFS_BB_SIZE;
if (start_hint >= offset && start_hint < (offset + length))
break;
if (start_hint < offset)
hi = mid - 1;
else
lo = mid + 1;
}
/*
* This is bad. Either the hint is bogus (which shouldn't
* happen) or the extent list must be screwed up. We
* have to abort.
*/
if (lo > hi) {
EFS_DPRINTF(("efs_extent_iterator_init: bsearch "
"failed to find extent\n"));
return;
}
exi->exi_next = next + mid;
exi->exi_dnext = indir;
exi->exi_innext = mid;
}
/*
@ -431,7 +574,11 @@ int
efs_extent_iterator_next(struct efs_extent_iterator *exi,
struct efs_extent *exp)
{
struct efs_extent ex;
struct efs_dextent *dexp;
struct efs_inode *eip = exi->exi_eip;
struct buf *bp;
int err, bboff, index;
if (exi->exi_next++ >= eip->ei_numextents)
return (-1);
@ -439,66 +586,37 @@ efs_extent_iterator_next(struct efs_extent_iterator *exi,
/* direct or indirect extents? */
if (eip->ei_numextents <= EFS_DIRECTEXTENTS) {
if (exp != NULL) {
efs_dextent_to_extent(
&eip->ei_di.di_extents[exi->exi_dnext++], exp);
dexp = &eip->ei_di.di_extents[exi->exi_dnext++];
efs_dextent_to_extent(dexp, exp);
}
} else {
/*
* Cache a full indirect extent worth of extent descriptors.
* This is maximally 124KB (248 * 512).
*/
if (exi->exi_incache == NULL) {
struct efs_extent ex;
struct buf *bp;
int err;
efs_dextent_to_extent(
&eip->ei_di.di_extents[exi->exi_dnext], &ex);
efs_dextent_to_extent(
&eip->ei_di.di_extents[exi->exi_dnext], &ex);
bboff = exi->exi_innext / EFS_EXTENTS_PER_BB;
index = exi->exi_innext % EFS_EXTENTS_PER_BB;
err = efs_bread(VFSTOEFS(eip->ei_vp->v_mount),
ex.ex_bn, ex.ex_length, NULL, &bp);
if (err) {
EFS_DPRINTF(("efs_extent_iterator_next: "
"efs_bread failed: %d\n", err));
brelse(bp);
return (err);
}
exi->exi_incache = malloc(ex.ex_length * EFS_BB_SIZE,
M_EFSTMP, M_WAITOK);
exi->exi_nincache = ex.ex_length * EFS_BB_SIZE /
sizeof(struct efs_dextent);
memcpy(exi->exi_incache, bp->b_data,
ex.ex_length * EFS_BB_SIZE);
err = efs_bread(VFSTOEFS(eip->ei_vp->v_mount),
ex.ex_bn + bboff, NULL, &bp);
if (err) {
EFS_DPRINTF(("efs_extent_iterator_next: "
"efs_bread failed: %d\n", err));
brelse(bp);
return (err);
}
if (exp != NULL) {
efs_dextent_to_extent(
&exi->exi_incache[exi->exi_innext++], exp);
dexp = (struct efs_dextent *)bp->b_data + index;
efs_dextent_to_extent(dexp, exp);
}
brelse(bp);
/* if this is the last one, ditch the cache */
if (exi->exi_innext >= exi->exi_nincache) {
bboff = exi->exi_innext++ / EFS_EXTENTS_PER_BB;
if (bboff >= ex.ex_length) {
exi->exi_innext = 0;
exi->exi_nincache = 0;
free(exi->exi_incache, M_EFSTMP);
exi->exi_incache = NULL;
exi->exi_dnext++;
}
}
return (0);
}
/*
* Clean up the extent iterator.
*/
void
efs_extent_iterator_free(struct efs_extent_iterator *exi)
{
if (exi->exi_incache != NULL)
free(exi->exi_incache, M_EFSTMP);
efs_extent_iterator_init(exi, NULL);
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: efs_subr.h,v 1.1 2007/06/29 23:30:30 rumble Exp $ */
/* $NetBSD: efs_subr.h,v 1.2 2007/07/04 19:24:09 rumble Exp $ */
/*
* Copyright (c) 2006 Stephen M. Rumble <rumble@ephemeral.org>
@ -25,9 +25,7 @@ struct efs_extent_iterator {
struct efs_inode *exi_eip;
off_t exi_next; /* next logical extent*/
off_t exi_dnext; /* next direct extent */
off_t exi_innext; /* next indirect ext */
struct efs_dextent *exi_incache; /* indirect ext cache */
size_t exi_nincache; /* size of incache */
off_t exi_innext; /* next indirect ext. */
};
int32_t efs_sb_checksum(struct efs_sb *, int);
@ -39,14 +37,12 @@ void efs_dextent_to_extent(struct efs_dextent *, struct efs_extent *);
void efs_extent_to_dextent(struct efs_extent *, struct efs_dextent *);
int efs_inode_lookup(struct efs_mount *, struct efs_inode *,
struct componentname *, ino_t *);
int efs_bread(struct efs_mount *, uint32_t, int, struct lwp *,
struct buf **);
int efs_bread(struct efs_mount *, uint32_t, struct lwp *, struct buf **);
void efs_sync_inode_to_dinode(struct efs_inode *);
void efs_sync_dinode_to_inode(struct efs_inode *);
void efs_extent_iterator_init(struct efs_extent_iterator *,
struct efs_inode *);
struct efs_inode *, off_t);
int efs_extent_iterator_next(struct efs_extent_iterator *,
struct efs_extent *);
void efs_extent_iterator_free(struct efs_extent_iterator *);
#endif /* !_FS_EFS_EFS_SUBR_H_ */

View File

@ -1,4 +1,4 @@
/* $NetBSD: efs_vfsops.c,v 1.2 2007/07/01 01:09:05 rumble Exp $ */
/* $NetBSD: efs_vfsops.c,v 1.3 2007/07/04 19:24:09 rumble Exp $ */
/*
* Copyright (c) 2006 Stephen M. Rumble <rumble@ephemeral.org>
@ -17,7 +17,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: efs_vfsops.c,v 1.2 2007/07/01 01:09:05 rumble Exp $");
__KERNEL_RCSID(0, "$NetBSD: efs_vfsops.c,v 1.3 2007/07/04 19:24:09 rumble Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -71,7 +71,7 @@ efs_mount_common(struct mount *mp, const char *path, struct vnode *devvp,
emp->em_mnt = mp;
/* read in the superblock */
err = efs_bread(emp, EFS_BB_SB, EFS_BY2BB(EFS_SB_SIZE), l, &bp);
err = efs_bread(emp, EFS_BB_SB, l, &bp);
if (err) {
EFS_DPRINTF(("superblock read failed\n"));
free(emp, M_EFSMNT);
@ -102,8 +102,7 @@ efs_mount_common(struct mount *mp, const char *path, struct vnode *devvp,
struct buf *rbp;
bool skip = false;
err = efs_bread(emp, be32toh(emp->em_sb.sb_replsb),
EFS_BY2BB(EFS_SB_SIZE), l, &rbp);
err = efs_bread(emp, be32toh(emp->em_sb.sb_replsb), l, &rbp);
if (err) {
printf("efs: read of superblock replicant failed; "
"please run fsck_efs(8)\n");
@ -132,7 +131,7 @@ efs_mount_common(struct mount *mp, const char *path, struct vnode *devvp,
}
/* ensure we can read last block */
err = efs_bread(emp, be32toh(emp->em_sb.sb_size) - 1, 1, l, &bp);
err = efs_bread(emp, be32toh(emp->em_sb.sb_size) - 1, l, &bp);
if (err) {
printf("efs: cannot access all filesystem blocks; please run "
"fsck_efs(8)\n");

View File

@ -1,4 +1,4 @@
/* $NetBSD: efs_vnops.c,v 1.2 2007/07/04 18:40:18 rumble Exp $ */
/* $NetBSD: efs_vnops.c,v 1.3 2007/07/04 19:24:09 rumble Exp $ */
/*
* Copyright (c) 2006 Stephen M. Rumble <rumble@ephemeral.org>
@ -17,7 +17,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: efs_vnops.c,v 1.2 2007/07/04 18:40:18 rumble Exp $");
__KERNEL_RCSID(0, "$NetBSD: efs_vnops.c,v 1.3 2007/07/04 19:24:09 rumble Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -246,7 +246,7 @@ efs_read(void *v)
if (ap->a_vp->v_type != VREG)
return (EINVAL);
efs_extent_iterator_init(&exi, eip);
efs_extent_iterator_init(&exi, eip, uio->uio_offset);
ret = efs_extent_iterator_next(&exi, &ex);
while (ret == 0) {
if (uio->uio_offset < 0 || uio->uio_offset >= eip->ei_size ||
@ -277,11 +277,9 @@ efs_read(void *v)
if (err) {
EFS_DPRINTF(("efs_read: uiomove error %d\n",
err));
efs_extent_iterator_free(&exi);
return (err);
}
}
efs_extent_iterator_free(&exi);
return ((ret == -1) ? 0 : ret);
}
@ -317,21 +315,21 @@ efs_readdir(void *v)
offset = 0;
efs_extent_iterator_init(&exi, ei);
efs_extent_iterator_init(&exi, ei, 0);
while ((ret = efs_extent_iterator_next(&exi, &ex)) == 0) {
err = efs_bread(VFSTOEFS(ap->a_vp->v_mount),
ex.ex_bn, ex.ex_length, NULL, &bp);
if (err) {
efs_extent_iterator_free(&exi);
brelse(bp);
return (err);
}
for (i = 0; i < ex.ex_length; i++) {
db = ((struct efs_dirblk *)bp->b_data) + i;
err = efs_bread(VFSTOEFS(ap->a_vp->v_mount),
ex.ex_bn + i, NULL, &bp);
if (err) {
brelse(bp);
return (err);
}
db = (struct efs_dirblk *)bp->b_data;
if (be16toh(db->db_magic) != EFS_DIRBLK_MAGIC) {
printf("efs_readdir: bad dirblk\n");
brelse(bp);
continue;
}
@ -356,13 +354,11 @@ efs_readdir(void *v)
if (offset > uio->uio_offset) {
/* XXX - shouldn't happen, right? */
brelse(bp);
efs_extent_iterator_free(&exi);
return (0);
}
if (s > uio->uio_resid) {
brelse(bp);
efs_extent_iterator_free(&exi);
return (0);
}
@ -381,7 +377,6 @@ efs_readdir(void *v)
if (err) {
brelse(bp);
free(dp, M_EFSTMP);
efs_extent_iterator_free(&exi);
return (err);
}
@ -416,17 +411,15 @@ efs_readdir(void *v)
free(dp, M_EFSTMP);
if (err) {
brelse(bp);
efs_extent_iterator_free(&exi);
return (err);
}
offset += s;
}
}
brelse(bp);
brelse(bp);
}
}
efs_extent_iterator_free(&exi);
if (ret != -1)
return (ret);
@ -450,7 +443,7 @@ efs_readlink(void *v)
struct efs_inode *eip = EFS_VTOI(ap->a_vp);
char *buf;
size_t len;
int err;
int err, i;
if ((eip->ei_mode & EFS_IFMT) != EFS_IFLNK)
return (EINVAL);
@ -470,39 +463,55 @@ efs_readlink(void *v)
memcpy(buf, eip->ei_di.di_symlink, eip->ei_size);
len = MIN(uio->uio_resid, eip->ei_size + 1);
buf[len - 1] = '\0';
err = uiomove(buf, len, uio);
free(buf, M_EFSTMP);
if (err)
return (err);
} else {
struct efs_extent_iterator exi;
struct efs_extent ex;
struct efs_dextent *dexp;
struct buf *bp;
int resid, off, ret;
if (eip->ei_numextents > 1) {
printf("efs_readlink: lazy\n");
free(buf, M_EFSTMP);
return (EBADF);
off = 0;
resid = eip->ei_size;
efs_extent_iterator_init(&exi, eip, 0);
while ((ret = efs_extent_iterator_next(&exi, &ex)) == 0) {
for (i = 0; i < ex.ex_length; i++) {
err = efs_bread(VFSTOEFS(ap->a_vp->v_mount),
ex.ex_bn + i, NULL, &bp);
if (err) {
brelse(bp);
free(buf, M_EFSTMP);
return (err);
}
len = MIN(resid, bp->b_bcount);
memcpy(buf + off, bp->b_data, len);
brelse(bp);
off += len;
resid -= len;
if (resid == 0)
break;
}
if (resid == 0)
break;
}
dexp = &eip->ei_di.di_extents[0];
efs_dextent_to_extent(dexp, &ex);
if (ret != 0 && ret != -1) {
free(buf, M_EFSTMP);
return (ret);
}
err = efs_bread(VFSTOEFS(ap->a_vp->v_mount), ex.ex_bn,
ex.ex_length, NULL, &bp);
len = MIN(eip->ei_size, ex.ex_length * EFS_BB_SIZE);
memcpy(buf, bp->b_data, len);
brelse(bp);
buf[len] = '\0';
err = uiomove(buf, len + 1, uio);
free(buf, M_EFSTMP);
if (err)
return (err);
len = off + 1;
}
return (0);
KASSERT(len >= 1 && len <= (eip->ei_size + 1));
buf[len - 1] = '\0';
err = uiomove(buf, len, uio);
free(buf, M_EFSTMP);
return (err);
}
/*
@ -571,7 +580,7 @@ efs_bmap(void *v)
*ap->a_vpp = VFSTOEFS(vp->v_mount)->em_devvp;
found = false;
efs_extent_iterator_init(&exi, eip);
efs_extent_iterator_init(&exi, eip, ap->a_bn * EFS_BB_SIZE);
while ((ret = efs_extent_iterator_next(&exi, &ex)) == 0) {
if (ap->a_bn >= ex.ex_offset &&
ap->a_bn < (ex.ex_offset + ex.ex_length)) {
@ -579,7 +588,6 @@ efs_bmap(void *v)
break;
}
}
efs_extent_iterator_free(&exi);
KASSERT(!found || (found && ret == 0));