/* $NetBSD: lfs_inode.c,v 1.26 1999/06/15 22:25:41 perseant Exp $ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Konrad E. Schroder . * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the NetBSD * Foundation, Inc. and its contributors. * 4. Neither the name of The NetBSD Foundation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Copyright (c) 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)lfs_inode.c 8.9 (Berkeley) 5/8/95 */ #if defined(_KERNEL) && !defined(_LKM) #include "opt_quota.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int lfs_vinvalbuf __P((struct vnode *, struct ucred *, struct proc *, ufs_daddr_t)); /* Search a block for a specific dinode. */ struct dinode * lfs_ifind(fs, ino, dip) struct lfs *fs; ino_t ino; register struct dinode *dip; { register int cnt; register struct dinode *ldip; for (cnt = INOPB(fs), ldip = dip + (cnt - 1); cnt--; --ldip) if (ldip->di_inumber == ino) return (ldip); panic("lfs_ifind: dinode %u not found", ino); /* NOTREACHED */ } int lfs_update(v) void *v; { struct vop_update_args /* { struct vnode *a_vp; struct timespec *a_access; struct timespec *a_modify; int a_waitfor; } */ *ap = v; struct inode *ip; struct vnode *vp = ap->a_vp; int mod, oflag; struct timespec ts; struct lfs *fs = VFSTOUFS(vp->v_mount)->um_lfs; if (vp->v_mount->mnt_flag & MNT_RDONLY) return (0); ip = VTOI(vp); /* * If we are called from vinvalbuf, and the file's blocks have * already been scheduled for writing, but the writes have not * yet completed, lfs_vflush will not be called, and vinvalbuf * will cause a panic. So, we must wait until any pending write * for our inode completes, if we are called with LFS_SYNC set. */ while((ap->a_waitfor & LFS_SYNC) && WRITEINPROG(vp)) { #ifdef DEBUG_LFS printf("lfs_update: sleeping on inode %d (in-progress)\n",ip->i_number); #endif tsleep(vp, (PRIBIO+1), "lfs_update", 0); } mod = ip->i_flag & IN_MODIFIED; oflag = ip->i_flag; TIMEVAL_TO_TIMESPEC(&time, &ts); LFS_ITIMES(ip, ap->a_access ? ap->a_access : &ts, ap->a_modify ? ap->a_modify : &ts, &ts); if (!mod && (ip->i_flag & IN_MODIFIED)) ip->i_lfs->lfs_uinodes++; if ((ip->i_flag & (IN_MODIFIED|IN_CLEANING)) == 0) { return (0); } /* If sync, push back the vnode and any dirty blocks it may have. */ if(ap->a_waitfor & LFS_SYNC) { /* Avoid flushing VDIROP. */ ++fs->lfs_diropwait; while(vp->v_flag & VDIROP) { #ifdef DEBUG_LFS printf("lfs_update: sleeping on inode %d (dirops)\n",ip->i_number); #endif if(fs->lfs_dirops==0) lfs_flush_fs(vp->v_mount,0); else tsleep(&fs->lfs_writer, PRIBIO+1, "lfs_fsync", 0); /* XXX KS - by falling out here, are we writing the vn twice? */ } --fs->lfs_diropwait; return lfs_vflush(vp); } return 0; } /* Update segment usage information when removing a block. */ #define UPDATE_SEGUSE \ if (lastseg != -1) { \ LFS_SEGENTRY(sup, fs, lastseg, sup_bp); \ if (num > sup->su_nbytes) { \ printf("lfs_truncate: negative bytes: segment %d short by %d\n", \ lastseg, num - sup->su_nbytes); \ panic("lfs_truncate: negative bytes"); \ sup->su_nbytes = 0; \ } else \ sup->su_nbytes -= num; \ e1 = VOP_BWRITE(sup_bp); \ fragsreleased += numfrags(fs, num); \ } #define SEGDEC(S) { \ if (daddr != 0) { \ if (lastseg != (seg = datosn(fs, daddr))) { \ UPDATE_SEGUSE; \ num = (S); \ lastseg = seg; \ } else \ num += (S); \ } \ } /* * Truncate the inode ip to at most length size. Update segment usage * table information. */ /* ARGSUSED */ int lfs_truncate(v) void *v; { struct vop_truncate_args /* { struct vnode *a_vp; off_t a_length; int a_flags; struct ucred *a_cred; struct proc *a_p; } */ *ap = v; register struct indir *inp; register int i; register ufs_daddr_t *daddrp; register struct vnode *vp = ap->a_vp; off_t length = ap->a_length; struct buf *bp, *sup_bp; struct ifile *ifp; struct inode *ip; struct lfs *fs; struct indir a[NIADDR + 2], a_end[NIADDR + 2]; SEGUSE *sup; ufs_daddr_t daddr, lastblock, lbn, olastblock; ufs_daddr_t oldsize_lastblock, oldsize_newlast, newsize; long off, a_released, fragsreleased, i_released; int e1, e2, depth, lastseg, num, offset, seg, freesize; ip = VTOI(vp); if (vp->v_type == VLNK && vp->v_mount->mnt_maxsymlinklen > 0) { #ifdef DIAGNOSTIC if (length != 0) panic("lfs_truncate: partial truncate of symlink"); #endif bzero((char *)&ip->i_ffs_shortlink, (u_int)ip->i_ffs_size); ip->i_ffs_size = 0; ip->i_flag |= IN_CHANGE | IN_UPDATE; return (VOP_UPDATE(vp, NULL, NULL, 0)); } uvm_vnp_setsize(vp, length); fs = ip->i_lfs; lfs_imtime(fs); /* If length is larger than the file, just update the times. */ if (ip->i_ffs_size <= length) { ip->i_flag |= IN_CHANGE | IN_UPDATE; return (VOP_UPDATE(vp, NULL, NULL, 0)); } /* * Make sure no writes happen while we're truncating. * Otherwise, blocks which are accounted for on the inode * *and* which have been created for cleaning can coexist, * and cause us to overcount, and panic below. * * XXX KS - too restrictive? Maybe only when cleaning? */ while(fs->lfs_seglock) { tsleep(&fs->lfs_seglock, (PRIBIO+1), "lfs_truncate", 0); } /* * Calculate index into inode's block list of last direct and indirect * blocks (if any) which we want to keep. Lastblock is 0 when the * file is truncated to 0. */ lastblock = lblkno(fs, length + fs->lfs_bsize - 1); olastblock = lblkno(fs, ip->i_ffs_size + fs->lfs_bsize - 1) - 1; /* * Update the size of the file. If the file is not being truncated to * a block boundry, the contents of the partial block following the end * must be zero'd in case it ever becomes accessible again * because of subsequent file growth. For this part of the code, * oldsize_newlast refers to the old size of the new last block in the * file. */ offset = blkoff(fs, length); lbn = lblkno(fs, length); oldsize_newlast = blksize(fs, ip, lbn); /* Now set oldsize to the current size of the current last block */ oldsize_lastblock = blksize(fs, ip, olastblock); if (offset == 0) ip->i_ffs_size = length; else { #ifdef QUOTA if ((e1 = getinoquota(ip)) != 0) return (e1); #endif if ((e1 = bread(vp, lbn, oldsize_newlast, NOCRED, &bp)) != 0) { printf("lfs_truncate: bread: %d\n",e1); brelse(bp); return (e1); } ip->i_ffs_size = length; (void)uvm_vnp_uncache(vp); newsize = blksize(fs, ip, lbn); bzero((char *)bp->b_data + offset, (u_int)(newsize - offset)); #ifdef DEBUG if(bp->b_flags & B_CALL) panic("Can't allocbuf malloced buffer!"); else #endif allocbuf(bp, newsize); if(oldsize_newlast > newsize) ip->i_ffs_blocks -= btodb(oldsize_newlast - newsize); if ((e1 = VOP_BWRITE(bp)) != 0) { printf("lfs_truncate: bwrite: %d\n",e1); return (e1); } } /* * Modify sup->su_nbyte counters for each deleted block; keep track * of number of blocks removed for ip->i_ffs_blocks. */ fragsreleased = 0; num = 0; lastseg = -1; for (lbn = olastblock; lbn >= lastblock;) { /* XXX use run length from bmap array to make this faster */ ufs_bmaparray(vp, lbn, &daddr, a, &depth, NULL); if (lbn == olastblock) { for (i = NIADDR + 2; i--;) a_end[i] = a[i]; freesize = oldsize_lastblock; } else freesize = fs->lfs_bsize; switch (depth) { case 0: /* Direct block. */ daddr = ip->i_ffs_db[lbn]; SEGDEC(freesize); ip->i_ffs_db[lbn] = 0; --lbn; break; #ifdef DIAGNOSTIC case 1: /* An indirect block. */ panic("lfs_truncate: ufs_bmaparray returned depth 1"); /* NOTREACHED */ #endif default: /* Chain of indirect blocks. */ inp = a + --depth; if (inp->in_off > 0 && lbn != lastblock) { lbn -= inp->in_off < lbn - lastblock ? inp->in_off : lbn - lastblock; break; } for (; depth && (inp->in_off == 0 || lbn == lastblock); --inp, --depth) { if (bread(vp, inp->in_lbn, fs->lfs_bsize, NOCRED, &bp)) panic("lfs_truncate: bread bno %d", inp->in_lbn); daddrp = (ufs_daddr_t *)bp->b_data + inp->in_off; for (i = inp->in_off; i++ <= a_end[depth].in_off;) { daddr = *daddrp++; SEGDEC(freesize); } a_end[depth].in_off = NINDIR(fs) - 1; if (inp->in_off == 0) brelse (bp); else { bzero((ufs_daddr_t *)bp->b_data + inp->in_off, fs->lfs_bsize - inp->in_off * sizeof(ufs_daddr_t)); if ((e1 = VOP_BWRITE(bp)) != 0) { printf("lfs_truncate: indir bwrite: %d\n",e1); return (e1); } } } if (depth == 0 && a[1].in_off == 0) { off = a[0].in_off; daddr = ip->i_ffs_ib[off]; SEGDEC(freesize); ip->i_ffs_ib[off] = 0; } if (lbn == lastblock || lbn <= NDADDR) --lbn; else { lbn -= NINDIR(fs); if (lbn < lastblock) lbn = lastblock; } } } UPDATE_SEGUSE; /* If truncating the file to 0, update the version number. */ if (length == 0) { LFS_IENTRY(ifp, fs, ip->i_number, bp); ++ifp->if_version; (void) VOP_BWRITE(bp); } #ifdef DIAGNOSTIC if (ip->i_ffs_blocks < fragstodb(fs, fragsreleased)) { panic("lfs_truncate: frag count < 0 (%d<%ld), ino %d\n", ip->i_ffs_blocks, fragstodb(fs, fragsreleased), ip->i_number); fragsreleased = dbtofrags(fs, ip->i_ffs_blocks); } #endif ip->i_ffs_blocks -= fragstodb(fs, fragsreleased); fs->lfs_bfree += fragstodb(fs, fragsreleased); ip->i_flag |= IN_CHANGE | IN_UPDATE; /* * Traverse dirty block list counting number of dirty buffers * that are being deleted out of the cache, so that the lfs_avail * field can be updated. */ a_released = 0; i_released = 0; for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = bp->b_vnbufs.le_next) { /* XXX KS - Don't miscount if we're not truncating to zero. */ if(length>0 && !(bp->b_lblkno >= 0 && bp->b_lblkno > lastblock) && !(bp->b_lblkno < 0 && bp->b_lblkno < -lastblock-NIADDR)) continue; if (bp->b_flags & B_LOCKED) { a_released += numfrags(fs, bp->b_bcount); /* * XXX * When buffers are created in the cache, their block * number is set equal to their logical block number. * If that is still true, we are assuming that the * blocks are new (not yet on disk) and weren't * counted above. However, there is a slight chance * that a block's disk address is equal to its logical * block number in which case, we'll get an overcounting * here. */ if (bp->b_blkno == bp->b_lblkno) { i_released += numfrags(fs, bp->b_bcount); } } } fragsreleased = i_released; #ifdef DIAGNOSTIC if (fragsreleased > dbtofrags(fs, ip->i_ffs_blocks)) { printf("lfs_inode: %ld frags released > %d in inode %d\n", fragsreleased, dbtofrags(fs, ip->i_ffs_blocks), ip->i_number); fragsreleased = dbtofrags(fs, ip->i_ffs_blocks); } #endif fs->lfs_bfree += fragstodb(fs, fragsreleased); ip->i_ffs_blocks -= fragstodb(fs, fragsreleased); #ifdef DIAGNOSTIC if (length == 0 && ip->i_ffs_blocks != 0) { printf("lfs_inode: trunc to zero, but %d blocks left on inode %d\n", ip->i_ffs_blocks, ip->i_number); panic("lfs_inode\n"); } #endif fs->lfs_avail += fragstodb(fs, a_released); if(length>0) e1 = lfs_vinvalbuf(vp, ap->a_cred, ap->a_p, lastblock-1); else e1 = vinvalbuf(vp, 0, ap->a_cred, ap->a_p, 0, 0); e2 = VOP_UPDATE(vp, NULL, NULL, 0); if(e1) printf("lfs_truncate: vinvalbuf: %d\n",e1); if(e2) printf("lfs_truncate: update: %d\n",e2); return (e1 ? e1 : e2 ? e2 : 0); } /* * Get rid of blocks a la vinvalbuf; but only blocks that are of a higher * lblkno than the file size allows. */ int lfs_vinvalbuf(vp, cred, p, maxblk) register struct vnode *vp; struct ucred *cred; struct proc *p; ufs_daddr_t maxblk; { register struct buf *bp; struct buf *nbp, *blist; int i, s, error, dirty; top: dirty=0; for (i=0;i<2;i++) { if(i==0) blist = vp->v_cleanblkhd.lh_first; else /* i == 1 */ blist = vp->v_dirtyblkhd.lh_first; for (bp = blist; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; s = splbio(); if (bp->b_flags & B_GATHERED) { error = tsleep(vp, PRIBIO+1, "lfs_vin2", 0); splx(s); if(error) return error; goto top; } if (bp->b_flags & B_BUSY) { bp->b_flags |= B_WANTED; error = tsleep((caddr_t)bp, (PRIBIO + 1), "lfs_vinval", 0); splx(s); if (error) return (error); goto top; } bp->b_flags |= B_BUSY; splx(s); if((bp->b_lblkno >= 0 && bp->b_lblkno > maxblk) || (bp->b_lblkno < 0 && bp->b_lblkno < -maxblk-(NIADDR-1))) { bp->b_flags |= B_INVAL | B_VFLUSH; if(bp->b_flags & B_CALL) { lfs_freebuf(bp); } else { brelse(bp); } ++dirty; } else { /* * This buffer is still on its free list. * So don't brelse, but wake up any sleepers. */ bp->b_flags &= ~B_BUSY; if(bp->b_flags & B_WANTED) { bp->b_flags &= ~(B_WANTED|B_AGE); wakeup(bp); } } } } if(dirty) goto top; return (0); }