From f8b02b9cdf5fefbdbb98ee4553858ca46388ee71 Mon Sep 17 00:00:00 2001 From: tsutsui Date: Sat, 17 Nov 2007 16:50:25 +0000 Subject: [PATCH] Add a dumb but BSD-licensed newfs_ext2fs(8) command. This is based on the newfs(8) command for ffs, but most ext2fs specific part is written from scratch. Tested on alpha, cobalt, i386 and sgimips with some disks and vnd(4), and at least cobalt's firmware can load boot files from an ext2fs E2FS_REV0 partition created by this native newfs_ext2fs(8) command. Closes PR bin/16175. --- sbin/newfs_ext2fs/Makefile | 16 + sbin/newfs_ext2fs/extern.h | 45 + sbin/newfs_ext2fs/mke2fs.c | 1422 ++++++++++++++++++++++++++++++ sbin/newfs_ext2fs/newfs_ext2fs.8 | 321 +++++++ sbin/newfs_ext2fs/newfs_ext2fs.c | 477 ++++++++++ 5 files changed, 2281 insertions(+) create mode 100644 sbin/newfs_ext2fs/Makefile create mode 100644 sbin/newfs_ext2fs/extern.h create mode 100644 sbin/newfs_ext2fs/mke2fs.c create mode 100644 sbin/newfs_ext2fs/newfs_ext2fs.8 create mode 100644 sbin/newfs_ext2fs/newfs_ext2fs.c diff --git a/sbin/newfs_ext2fs/Makefile b/sbin/newfs_ext2fs/Makefile new file mode 100644 index 000000000000..d56ee5939b01 --- /dev/null +++ b/sbin/newfs_ext2fs/Makefile @@ -0,0 +1,16 @@ +# $NetBSD: Makefile,v 1.1 2007/11/17 16:50:25 tsutsui Exp $ +# @(#)Makefile 8.2 (Berkeley) 3/27/94 + +.include + +PROG= newfs_ext2fs +SRCS= newfs_ext2fs.c mke2fs.c ext2fs_bswap.c partutil.c +MAN= newfs_ext2fs.8 + +FSCK=${NETBSDSRCDIR}/sbin/fsck +CPPFLAGS+=-I${.CURDIR} -I${FSCK} +DPADD+= ${LIBUTIL} +LDADD+= -lutil +.PATH: ${NETBSDSRCDIR}/sys/ufs/ext2fs ${FSCK} + +.include diff --git a/sbin/newfs_ext2fs/extern.h b/sbin/newfs_ext2fs/extern.h new file mode 100644 index 000000000000..482be7096dbb --- /dev/null +++ b/sbin/newfs_ext2fs/extern.h @@ -0,0 +1,45 @@ +/* $NetBSD: extern.h,v 1.1 2007/11/17 16:50:25 tsutsui Exp $ */ + +/* + * Copyright (c) 1997 Christos Zoulas. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Christos Zoulas. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* prototypes */ +void mke2fs(const char *, int, int); + +/* variables set up by front end. */ +extern int Nflag; /* run mkfs without writing file system */ +extern int Oflag; /* format as an 4.3BSD file system */ +extern int verbosity; /* amount of printf() output */ +extern int64_t fssize; /* file system size */ +extern uint sectorsize; /* sector size */ +extern uint fsize; /* fragment size */ +extern uint bsize; /* block size */ +extern uint minfree; /* free space threshold */ +extern uint num_inodes; /* number of inodes (overrides density) */ +extern char *volname; /* volume name */ diff --git a/sbin/newfs_ext2fs/mke2fs.c b/sbin/newfs_ext2fs/mke2fs.c new file mode 100644 index 000000000000..468a3994fcd2 --- /dev/null +++ b/sbin/newfs_ext2fs/mke2fs.c @@ -0,0 +1,1422 @@ +/* $NetBSD: mke2fs.c,v 1.1 2007/11/17 16:50:26 tsutsui Exp $ */ + +/* + * Copyright (c) 2007 Izumi Tsutsui. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1980, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Copyright (c) 1997 Manuel Bouyer. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Manuel Bouyer. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * mke2fs.c: "re-invent (dumb but non-GPLed) wheel as a fun project" + * + * In spite of this name, there is no piece of code + * derived from GPLed e2fsprogs written for Linux. + * I referred them only to see how each structure + * member should be initialized. + * + * Reference: + * - All NetBSD sources under src/sys/ufs/ext2fs and src/sbin/fsck_ext2fs + * - Ext2fs Home Page + * http://e2fsprogs.sourceforge.net/ext2.html + * - Design and Implementation of the Second Extended Filesystem + * http://e2fsprogs.sourceforge.net/ext2intro.html + * - Linux Documentation "The Second Extended Filesystem" + * src/linux/Documentation/filesystems/ext2.txt + * in the Linux kernel distribution + */ + +#include +#ifndef lint +#if 0 +static char sccsid[] = "@(#)mkfs.c 8.11 (Berkeley) 5/3/95"; +#else +__RCSID("$NetBSD: mke2fs.c,v 1.1 2007/11/17 16:50:26 tsutsui Exp $"); +#endif +#endif /* not lint */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "extern.h" + +static void initcg(uint); +static void zap_old_sblock(daddr_t); +static uint cgoverhead(uint); +static int fsinit(const struct timeval *); +static int makedir(struct ext2fs_direct *, int); +static void copy_dir(struct ext2fs_direct *, struct ext2fs_direct *); +static void init_resizeino(const struct timeval *); +static uint32_t alloc(uint32_t, uint16_t); +static void iput(struct ext2fs_dinode *, ino_t); +static void rdfs(daddr_t, int, void *); +static void wtfs(daddr_t, int, void *); +static int ilog2(uint); +static int skpc(int, size_t, uint8_t *); + +/* XXX: some of these macro should be into ? */ +#define EXT2_DEF_MAX_MNT_COUNT 20 +#define EXT2_DEF_FSCKINTV (180 * 24 * 60 * 60) /* 180 days */ +#define EXT2_RESERVED_INODES (EXT2_FIRSTINO - 1) +#define EXT2_UMASK 0755 + +#define EXT2_INO_INDEX(ino) ((ino) - 1) /* no inode zero */ + +#define EXT2_LOSTFOUNDSIZE 16384 +#define EXT2_LOSTFOUNDINO EXT2_FIRSTINO /* XXX: not quite */ +#define EXT2_LOSTFOUNDUMASK 0700 + +#define NBLOCK_SUPERBLOCK 1 +#define NBLOCK_BLOCK_BITMAP 1 +#define NBLOCK_INODE_BITMAP 1 + +#define cgbase(fs, c) \ + ((fs)->e2fs.e2fs_first_dblock + (fs)->e2fs.e2fs_bpg * (c)) + + +/* + * ext2fs super block and group descriptor structures + * + * We don't have to use or setup whole in-memory m_ext2fs structure, + * but prepare it to use several macro defined in kernel headers. + */ +union { + struct m_ext2fs m_ext2fs; + char pad[SBSIZE]; +} ext2fsun; +#define sblock ext2fsun.m_ext2fs +#define gd ext2fsun.m_ext2fs.e2fs_gd + +static uint8_t *iobuf; /* for superblock and group descriptors */ +static int iobufsize; + +static uint8_t buf[MAXBSIZE]; /* for initcg() and makedir() ops */ + +static int fsi, fso; + +void +mke2fs(const char *fsys, int fi, int fo) +{ + struct timeval tv; + int64_t minfssize; + uint bcount, fbcount, ficount; + uint blocks_gd, blocks_per_cg, inodes_per_cg, iblocks_per_cg; + uint minblocks_per_cg, blocks_lastcg; + uint ncg, cylno, sboff; + uuid_t uuid; + uint32_t uustat; + int i, len, col, delta, fld_width, max_cols; + struct winsize winsize; + + gettimeofday(&tv, NULL); + fsi = fi; + fso = fo; + + /* + * collect and verify the block and fragment sizes + */ + if (!powerof2(bsize)) { + errx(EXIT_FAILURE, + "block size must be a power of 2, not %d\n", + bsize); + } + if (!powerof2(fsize)) { + errx(EXIT_FAILURE, + "fragment size must be a power of 2, not %d\n", + fsize); + } + if (fsize < sectorsize) { + errx(EXIT_FAILURE, + "fragment size %d is too small, minimum is %d\n", + fsize, sectorsize); + } + if (bsize < MINBSIZE) { + errx(EXIT_FAILURE, + "block size %d is too small, minimum is %d\n", + bsize, MINBSIZE); + } + if (bsize > MAXBSIZE) { + errx(EXIT_FAILURE, + "block size %d is too large, maximum is %d\n", + bsize, MAXBSIZE); + } + if (bsize != fsize) { + /* + * There is no fragment support on current ext2fs (yet?), + * but some kernel code refers fsize or fpg as bsize or bpg + * and Linux seems to set the same values to them. + */ + errx(EXIT_FAILURE, + "block size (%d) can't be diffrent from " + "fragment size (%d)\n", + bsize, fsize); + } + + sblock.e2fs.e2fs_log_bsize = ilog2(bsize) - LOG_MINBSIZE; + /* Umm, why not e2fs_log_fsize? */ + sblock.e2fs.e2fs_fsize = ilog2(fsize) - LOG_MINBSIZE; + + sblock.e2fs_bsize = bsize; + sblock.e2fs_bshift = sblock.e2fs.e2fs_log_bsize + LOG_MINBSIZE; + sblock.e2fs_qbmask = sblock.e2fs_bsize - 1; + sblock.e2fs_bmask = ~sblock.e2fs_qbmask; + sblock.e2fs_fsbtodb = ilog2(sblock.e2fs_bsize) - ilog2(sectorsize); + + /* + * Ext2fs preseves BBSIZE (1024 bytes) space at the top for + * bootloader (though it is not enough at all for our bootloader). + * If bsize == BBSIZE we have to preserve one block. + * If bsize > BBSIZE, the first block already contains BBSIZE space + * before superblock because superblock is allocated at SBOFF and + * bsize is a power of two (i.e. 2048 bytes or more). + */ + sblock.e2fs.e2fs_first_dblock = (sblock.e2fs_bsize > BBSIZE) ? 0 : 1; + minfssize = fsbtodb(&sblock, + sblock.e2fs.e2fs_first_dblock + + NBLOCK_SUPERBLOCK + + 1 /* at least one group descriptor */ + + NBLOCK_BLOCK_BITMAP + + NBLOCK_INODE_BITMAP + + 1 /* at least one inode table block */ + + 1 /* at least one data block for rootdir */ + + 1 /* at least one data block for data */ + ); /* XXX and more? */ + + if (fssize < minfssize) + errx(EXIT_FAILURE, "Filesystem size %" PRId64 + " < minimum size of %" PRId64 "\n", fssize, minfssize); + + bcount = dbtofsb(&sblock, fssize); + + /* + * While many people claim that ext2fs is a (bad) clone of ufs/ffs, + * it isn't actual ffs so maybe we should call it "block group" + * as their native name rather than ffs delived "cylinder group." + * But we'll use the latter here since other kernel sources use it. + * (I also agree "cylinder" based allocation is obsolete though) + */ + + /* maybe "simple is the best" */ + blocks_per_cg = sblock.e2fs_bsize * NBBY; + + ncg = howmany(bcount - sblock.e2fs.e2fs_first_dblock, blocks_per_cg); + blocks_gd = howmany(sizeof(struct ext2_gd) * ncg, bsize); + + /* check range of inode number */ + if (num_inodes < EXT2_FIRSTINO) + num_inodes = EXT2_FIRSTINO; /* needs reserved inodes + 1 */ + if (num_inodes > UINT16_MAX * ncg) + num_inodes = UINT16_MAX * ncg; /* ext2bgd_nifree is uint16_t */ + + inodes_per_cg = num_inodes / ncg; + iblocks_per_cg = howmany(EXT2_DINODE_SIZE * inodes_per_cg, bsize); + + /* Check that the last cylinder group has enough space for inodes */ + minblocks_per_cg = + NBLOCK_BLOCK_BITMAP + + NBLOCK_INODE_BITMAP + + iblocks_per_cg + + 1; /* at least one data block */ + if (Oflag == 0 || cg_has_sb(ncg - 1) != 0) + minblocks_per_cg += NBLOCK_SUPERBLOCK + blocks_gd; + + blocks_lastcg = bcount - sblock.e2fs.e2fs_first_dblock - + blocks_per_cg * (ncg - 1); + if (blocks_lastcg < minblocks_per_cg) { + /* + * Since we make all the cylinder groups the same size, the + * last will only be small if there are more than one + * cylinder groups. If the last one is too small to store + * filesystem data, just kill it. + * + * XXX: Does fsck_ext2fs(8) properly handle this case? + */ + bcount -= blocks_lastcg; + ncg--; + blocks_lastcg = blocks_per_cg; + blocks_gd = howmany(sizeof(struct ext2_gd) * ncg, bsize); + inodes_per_cg = num_inodes / ncg; + iblocks_per_cg = + howmany(EXT2_DINODE_SIZE * inodes_per_cg, bsize); + } + /* inodes_per_cg should be a multiple of 8 for bitmap ops */ + inodes_per_cg = rounddown(inodes_per_cg, NBBY); + num_inodes = inodes_per_cg * ncg; + + /* XXX: probably we should check these adjusted values again */ + + sblock.e2fs.e2fs_bcount = bcount; + sblock.e2fs.e2fs_icount = num_inodes; + + sblock.e2fs_ncg = ncg; + sblock.e2fs_ngdb = blocks_gd; + sblock.e2fs_ipb = sblock.e2fs_bsize / EXT2_DINODE_SIZE; + sblock.e2fs_itpg = iblocks_per_cg; + + sblock.e2fs.e2fs_rbcount = sblock.e2fs.e2fs_bcount * minfree / 100; + /* e2fs_fbcount will be accounted later */ + /* e2fs_ficount will be accounted later */ + + sblock.e2fs.e2fs_bpg = blocks_per_cg; + sblock.e2fs.e2fs_fpg = blocks_per_cg; + + sblock.e2fs.e2fs_ipg = inodes_per_cg; + + sblock.e2fs.e2fs_mtime = 0; + sblock.e2fs.e2fs_wtime = tv.tv_sec; + sblock.e2fs.e2fs_mnt_count = 0; + /* XXX: should add some entropy to avoid checking all fs at once? */ + sblock.e2fs.e2fs_max_mnt_count = EXT2_DEF_MAX_MNT_COUNT; + + sblock.e2fs.e2fs_magic = E2FS_MAGIC; + sblock.e2fs.e2fs_state = E2FS_ISCLEAN; + sblock.e2fs.e2fs_beh = E2FS_BEH_DEFAULT; + sblock.e2fs.e2fs_minrev = 0; + sblock.e2fs.e2fs_lastfsck = tv.tv_sec; + sblock.e2fs.e2fs_fsckintv = EXT2_DEF_FSCKINTV; + + /* + * Maybe we can use E2FS_OS_FREEBSD here and it would be more proper, + * but the purpose of this newfs_ext2fs(8) command is to provide + * a filesystem which can be recognized by firmwares on some + * Linux based appliances that can load bootstrap files only from + * (their native) ext2fs, and anyway we will (and should) try to + * act like them as much as possible. + * + * Anyway, I hope that all newer such boxes will keep their support + * for the "GOOD_OLD_REV" ext2fs. + */ + sblock.e2fs.e2fs_creator = E2FS_OS_LINUX; + + if (Oflag == 0) { + sblock.e2fs.e2fs_rev = E2FS_REV0; + sblock.e2fs.e2fs_features_compat = 0; + sblock.e2fs.e2fs_features_incompat = 0; + sblock.e2fs.e2fs_features_rocompat = 0; + } else { + sblock.e2fs.e2fs_rev = E2FS_REV1; + /* + * e2fsprogs say "REV1" is "dynamic" so + * it isn't quite a version and maybe it means + * "extended from REV0 so check compat features." + * + * XXX: We don't have any native tool to activate + * the EXT2F_COMPAT_RESIZE feature and + * fsck_ext2fs(8) might not fix structures for it. + */ + sblock.e2fs.e2fs_features_compat = EXT2F_COMPAT_RESIZE; + sblock.e2fs.e2fs_features_incompat = EXT2F_INCOMPAT_FTYPE; + sblock.e2fs.e2fs_features_rocompat = + EXT2F_ROCOMPAT_SPARSESUPER | EXT2F_ROCOMPAT_LARGEFILE; + } + + sblock.e2fs.e2fs_ruid = geteuid(); + sblock.e2fs.e2fs_rgid = getegid(); + + sblock.e2fs.e2fs_first_ino = EXT2_FIRSTINO; + sblock.e2fs.e2fs_inode_size = EXT2_DINODE_SIZE; + + /* e2fs_block_group_nr is set on writing superblock to each group */ + + uuid_create(&uuid, &uustat); + if (uustat != uuid_s_ok) + errx(EXIT_FAILURE, "Failed to generate uuid\n"); + uuid_enc_le(sblock.e2fs.e2fs_uuid, &uuid); + if (volname != NULL) { + if (strlen(volname) > sizeof(sblock.e2fs.e2fs_vname)) + errx(EXIT_FAILURE, "Volume name is too long"); + strlcpy(sblock.e2fs.e2fs_vname, volname, + sizeof(sblock.e2fs.e2fs_vname)); + } + + sblock.e2fs.e2fs_fsmnt[0] = '\0'; + sblock.e2fs_fsmnt[0] = '\0'; + + sblock.e2fs.e2fs_algo = 0; /* XXX unsupported? */ + sblock.e2fs.e2fs_prealloc = 0; /* XXX unsupported? */ + sblock.e2fs.e2fs_dir_prealloc = 0; /* XXX unsupported? */ + + /* calculate blocks for reserved group descriptors for resize */ + sblock.e2fs.e2fs_reserved_ngdb = 0; + if ((sblock.e2fs.e2fs_features_compat & EXT2F_COMPAT_RESIZE) != 0) { + uint64_t target_blocks; + uint target_ncg, target_ngdb, reserved_ngdb; + + /* reserve descriptors for size as 1024 times as current */ + target_blocks = + (sblock.e2fs.e2fs_bcount - sblock.e2fs.e2fs_first_dblock) + * 1024ULL; + /* number of blocks must be in uint32_t */ + if (target_blocks > UINT32_MAX) + target_blocks = UINT32_MAX; + target_ncg = + howmany(target_blocks, sblock.e2fs.e2fs_bpg); + target_ngdb = howmany(sizeof(struct ext2_gd) * target_ncg, + sblock.e2fs_bsize); + /* + * Reserved group descriptor blocks are preserved as + * the second level double indirect reference blocks in + * the EXT2_RESIZEINO inode, so the maximum number of + * the blocks is NINDIR(fs). + * (see also descriptions in init_resizeino() function) + * + * We check a number including current e2fs_ngdb here + * because they will be moved into reserved gdb on + * possible future size shrink, though e2fsprogs don't + * seem to care about it. + */ + if (target_ngdb > NINDIR(&sblock)) + target_ngdb = NINDIR(&sblock); + + reserved_ngdb = target_ngdb - sblock.e2fs_ngdb; + + /* make sure reserved_ngdb fits in the last cg */ + if (reserved_ngdb >= blocks_lastcg - cgoverhead(ncg - 1)) + reserved_ngdb = blocks_lastcg - cgoverhead(ncg - 1); + if (reserved_ngdb == 0) { + /* if no space for reserved gdb, disable the feature */ + sblock.e2fs.e2fs_features_compat &= + ~EXT2F_COMPAT_RESIZE; + } + sblock.e2fs.e2fs_reserved_ngdb = reserved_ngdb; + } + + /* + * Initialize group descriptors + */ + gd = malloc(sblock.e2fs_ngdb * bsize); + if (gd == NULL) + errx(EXIT_FAILURE, "Can't allocate descriptors buffer"); + memset(gd, 0, sblock.e2fs_ngdb * bsize); + + fbcount = 0; + ficount = 0; + for (cylno = 0; cylno < ncg; cylno++) { + uint boffset; + + boffset = cgbase(&sblock, cylno); + if (sblock.e2fs.e2fs_rev == E2FS_REV0 || + (sblock.e2fs.e2fs_features_rocompat & + EXT2F_ROCOMPAT_SPARSESUPER) == 0 || + cg_has_sb(cylno)) { + boffset += NBLOCK_SUPERBLOCK + sblock.e2fs_ngdb; + if (sblock.e2fs.e2fs_rev > E2FS_REV0 && + (sblock.e2fs.e2fs_features_compat & + EXT2F_COMPAT_RESIZE) != 0) + boffset += sblock.e2fs.e2fs_reserved_ngdb; + } + gd[cylno].ext2bgd_b_bitmap = boffset; + boffset += NBLOCK_BLOCK_BITMAP; + gd[cylno].ext2bgd_i_bitmap = boffset; + boffset += NBLOCK_INODE_BITMAP; + gd[cylno].ext2bgd_i_tables = boffset; + if (cylno == (ncg - 1)) + gd[cylno].ext2bgd_nbfree = + blocks_lastcg - cgoverhead(cylno); + else + gd[cylno].ext2bgd_nbfree = + sblock.e2fs.e2fs_bpg - cgoverhead(cylno); + fbcount += gd[cylno].ext2bgd_nbfree; + gd[cylno].ext2bgd_nifree = sblock.e2fs.e2fs_ipg; + if (cylno == 0) { + /* take reserved inodes off nifree */ + gd[cylno].ext2bgd_nifree -= EXT2_RESERVED_INODES; + } + ficount += gd[cylno].ext2bgd_nifree; + gd[cylno].ext2bgd_ndirs = 0; + } + sblock.e2fs.e2fs_fbcount = fbcount; + sblock.e2fs.e2fs_ficount = ficount; + + /* + * Dump out summary information about file system. + */ + if (verbosity > 0) { + printf("%s: %u.%1uMB (%" PRId64 " sectors) " + "block size %u, fragment size %u\n", + fsys, + (uint)(((uint64_t)bcount * bsize) / (1024 * 1024)), + (uint)((uint64_t)bcount * bsize - + rounddown((uint64_t)bcount * bsize, 1024 * 1024)) + / 1024 / 100, + fssize, bsize, fsize); + printf("\tusing %u block groups of %u.0MB, %u blks, " + "%u inodes.\n", + ncg, bsize * sblock.e2fs.e2fs_bpg / (1024 * 1024), + sblock.e2fs.e2fs_bpg, sblock.e2fs.e2fs_ipg); + } + + /* + * allocate space for superblock and group descriptors + */ + iobufsize = (NBLOCK_SUPERBLOCK + sblock.e2fs_ngdb) * sblock.e2fs_bsize; + iobuf = mmap(0, iobufsize, PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0); + if (iobuf == NULL) + errx(EXIT_FAILURE, "Cannot allocate I/O buffer\n"); + memset(iobuf, 0, iobufsize); + + /* + * We now start writing to the filesystem + */ + + if (!Nflag) { + static const uint pbsize[] = { 1024, 2048, 8192, 16384, 0 }; + uint pblock, epblock; + /* + * Validate the given file system size. + * Verify that its last block can actually be accessed. + * Convert to file system fragment sized units. + */ + if (fssize <= 0) + errx(EXIT_FAILURE, "Preposterous size %" PRId64 "\n", + fssize); + wtfs(fssize - 1, sectorsize, iobuf); + + /* + * Ensure there is nothing that looks like a filesystem + * superbock anywhere other than where ours will be. + * If fsck_ext2fs finds the wrong one all hell breaks loose! + * + * XXX: needs to check how fsck_ext2fs programs even + * on other OSes determine alternate superblocks + */ + for (i = 0; pbsize[i] != 0; i++) { + epblock = bcount * bsize / pbsize[i]; + for (pblock = ((pbsize[i] == SBSIZE) ? 1 : 0); + pblock < epblock; + pblock += pbsize[i] * NBBY /* bpg */) + zap_old_sblock((daddr_t)pblock * + pbsize[i] / sectorsize); + } + } + + if (verbosity >= 3) + printf("super-block backups (for fsck_ext2fs -b #) at:\n"); + /* If we are printing more than one line of numbers, line up columns */ + fld_width = verbosity < 4 ? 1 : snprintf(NULL, 0, "%" PRIu64, + (uint64_t)cgbase(&sblock, ncg - 1)); + /* Get terminal width */ + if (ioctl(fileno(stdout), TIOCGWINSZ, &winsize) == 0) + max_cols = winsize.ws_col; + else + max_cols = 80; + if (Nflag && verbosity == 3) + /* Leave space to add " ..." after one row of numbers */ + max_cols -= 4; +#define BASE 0x10000 /* For some fixed-point maths */ + col = 0; + delta = verbosity > 2 ? 0 : max_cols * BASE / ncg; + for (cylno = 0; cylno < ncg; cylno++) { + fflush(stdout); + initcg(cylno); + if (verbosity < 2) + continue; + /* the first one is a master, not backup */ + if (cylno == 0) + continue; + /* skip if this cylinder doesn't have a backup */ + if (sblock.e2fs.e2fs_rev > E2FS_REV0 && + (sblock.e2fs.e2fs_features_rocompat & + EXT2F_ROCOMPAT_SPARSESUPER) != 0 && + cg_has_sb(cylno) == 0) + continue; + + if (delta > 0) { + if (Nflag) + /* No point doing dots for -N */ + break; + /* Print dots scaled to end near RH margin */ + for (col += delta; col > BASE; col -= BASE) + printf("."); + continue; + } + /* Print superblock numbers */ + len = printf(" %*" PRIu64 "," + !col, fld_width, + (uint64_t)cgbase(&sblock, cylno)); + col += len; + if (col + len < max_cols) + /* Next number fits */ + continue; + /* Next number won't fit, need a newline */ + if (verbosity <= 3) { + /* Print dots for subsequent cylinder groups */ + delta = sblock.e2fs_ncg - cylno - 1; + if (delta != 0) { + if (Nflag) { + printf(" ..."); + break; + } + delta = max_cols * BASE / delta; + } + } + col = 0; + printf("\n"); + } +#undef BASE + if (col > 0) + printf("\n"); + if (Nflag) + return; + + /* + * Now construct the initial file system, + */ + if (fsinit(&tv) == 0) + errx(EXIT_FAILURE, "Error making filesystem"); + /* + * Write out the superblock and group descriptors + */ + sblock.e2fs.e2fs_block_group_nr = 0; + sboff = 0; + if (cgbase(&sblock, 0) == 0) { + /* + * If the first block contains the boot block sectors, + * (i.e. in case of sblock.e2fs.e2fs_bsize > BBSIZE) + * we have to preserve data in it. + */ + sboff = SBOFF; + } + e2fs_sbsave(&sblock.e2fs, (struct ext2fs *)(iobuf + sboff)); + e2fs_cgsave(gd, (struct ext2_gd *)(iobuf + sblock.e2fs_bsize), + sizeof(struct ext2_gd) * sblock.e2fs_ncg); + wtfs(fsbtodb(&sblock, cgbase(&sblock, 0)) + sboff / sectorsize, + iobufsize - sboff, iobuf + sboff); + + munmap(iobuf, iobufsize); +} + +/* + * Initialize a cylinder (block) group. + */ +void +initcg(uint cylno) +{ + uint nblcg, i, sboff; + + /* + * Make a copy of the superblock and group descriptors. + */ + if (sblock.e2fs.e2fs_rev == E2FS_REV0 || + (sblock.e2fs.e2fs_features_rocompat & + EXT2F_ROCOMPAT_SPARSESUPER) == 0 || + cg_has_sb(cylno)) { + sblock.e2fs.e2fs_block_group_nr = cylno; + sboff = 0; + if (cgbase(&sblock, cylno) == 0) { + /* preserve data in bootblock in cg0 */ + sboff = SBOFF; + } + e2fs_sbsave(&sblock.e2fs, (struct ext2fs *)(iobuf + sboff)); + e2fs_cgsave(gd, (struct ext2_gd *)(iobuf + + sblock.e2fs_bsize * NBLOCK_SUPERBLOCK), + sizeof(struct ext2_gd) * sblock.e2fs_ncg); + /* write superblock and group descriptor backups */ + wtfs(fsbtodb(&sblock, cgbase(&sblock, cylno)) + + sboff / sectorsize, iobufsize - sboff, iobuf + sboff); + } + + /* + * Initialize block bitmap. + */ + memset(buf, 0, sblock.e2fs_bsize); + if (cylno == (sblock.e2fs_ncg - 1)) { + /* The last group could have less blocks than e2fs_bpg. */ + nblcg = sblock.e2fs.e2fs_bcount - + cgbase(&sblock, sblock.e2fs_ncg - 1); + for (i = nblcg; i < roundup(nblcg, NBBY); i++) + setbit(buf, i); + memset(&buf[i / NBBY], ~0U, sblock.e2fs.e2fs_bpg - i); + } + /* set overhead (superblock, group descriptor etc.) blocks used */ + for (i = 0; i < cgoverhead(cylno) / NBBY; i++) + buf[i] = ~0; + i = i * NBBY; + for (; i < cgoverhead(cylno); i++) + setbit(buf, i); + wtfs(fsbtodb(&sblock, gd[cylno].ext2bgd_b_bitmap), sblock.e2fs_bsize, + buf); + + /* + * Initialize inode bitmap. + * + * Assume e2fs_ipg is a multiple of NBBY (as we did above). + * Note even (possibly smaller) the last group has the same e2fs_ipg. + */ + i = sblock.e2fs.e2fs_ipg / NBBY; + memset(buf, 0, i); + memset(buf + i, ~0U, sblock.e2fs_bsize - i); + if (cylno == 0) { + /* mark reserved inodes */ + for (i = 1; i < EXT2_FIRSTINO; i++) + setbit(buf, EXT2_INO_INDEX(i)); + } + wtfs(fsbtodb(&sblock, gd[cylno].ext2bgd_i_bitmap), sblock.e2fs_bsize, + buf); + + /* + * Initialize inode tables. + * + * Just zero out entries (no magic there). + */ + memset(buf, 0, sblock.e2fs_bsize); + for (i = 0; i < sblock.e2fs_itpg; i++) { + wtfs(fsbtodb(&sblock, gd[cylno].ext2bgd_i_tables + i), + sblock.e2fs_bsize, buf); + } +} + +/* + * Zap possible lingering old superblock data + */ +static void +zap_old_sblock(daddr_t sec) +{ + static daddr_t cg0_data; + uint32_t oldfs[SBSIZE / sizeof(uint32_t)]; + static const struct fsm { + uint32_t offset; + uint32_t magic; + uint32_t mask; + } fs_magics[] = { + {offsetof(struct ext2fs, e2fs_magic) / 4, E2FS_MAGIC, 0xffff}, + {offsetof(struct ext2fs, e2fs_magic) / 4, + E2FS_MAGIC << 16, 0xffff0000}, + {14, 0xef530000, 0xffff0000}, /* EXT2FS (big) */ + {0x55c / 4, 0x00011954, ~0U}, /* FS_UFS1_MAGIC */ + {0x55c / 4, 0x19540119, ~0U}, /* FS_UFS2_MAGIC */ + {0, 0x70162, ~0U}, /* LFS_MAGIC */ + {.offset = ~0U}, + }; + const struct fsm *fsm; + + if (Nflag) + return; + + /* don't override data before superblock */ + if (sec < SBOFF / sectorsize) + return; + + if (cg0_data == 0) { + cg0_data = + ((daddr_t)sblock.e2fs.e2fs_first_dblock + cgoverhead(0)) * + sblock.e2fs_bsize / sectorsize; + } + + /* Ignore anything that is beyond our filesystem */ + if (sec >= fssize) + return; + /* Zero anything inside our filesystem... */ + if (sec >= sblock.e2fs.e2fs_first_dblock * bsize / sectorsize) { + /* ...unless we will write that area anyway */ + if (sec >= cg0_data) + /* assume iobuf is zero'ed here */ + wtfs(sec, roundup(SBSIZE, sectorsize), iobuf); + return; + } + + /* + * The sector might contain boot code, so we must validate it + * + * XXX: ext2fs won't preserve data after SBOFF, + * but first_dblock could have a differnt value. + */ + rdfs(sec, sizeof(oldfs), &oldfs); + for (fsm = fs_magics;; fsm++) { + uint32_t v; + if (fsm->mask == 0) + return; + v = oldfs[fsm->offset]; + if ((v & fsm->mask) == fsm->magic || + (bswap32(v) & fsm->mask) == fsm->magic) + break; + } + + /* Just zap the magic number */ + oldfs[fsm->offset] = 0; + wtfs(sec, sizeof(oldfs), &oldfs); +} + +/* + * uint cgoverhead(uint c) + * + * Return a number of reserved blocks on the specified group. + * XXX: should be shared with src/sbin/newfs_ext2fs/setup.c + */ +uint +cgoverhead(uint c) +{ + uint overh; + + overh = NBLOCK_BLOCK_BITMAP + NBLOCK_INODE_BITMAP + sblock.e2fs_itpg; + + if (sblock.e2fs.e2fs_rev == E2FS_REV0 || + (sblock.e2fs.e2fs_features_rocompat & + EXT2F_ROCOMPAT_SPARSESUPER) == 0 || + cg_has_sb(c) != 0) { + overh += NBLOCK_SUPERBLOCK + sblock.e2fs_ngdb; + + if (sblock.e2fs.e2fs_rev > E2FS_REV0 && + (sblock.e2fs.e2fs_features_compat & + EXT2F_COMPAT_RESIZE) != 0) + overh += sblock.e2fs.e2fs_reserved_ngdb; + } + + return overh; +} + +/* + * Initialize the file system + */ + +#define LOSTDIR /* e2fsck complains if there is no lost+found */ + +#define PREDEFDIR 2 + +#ifdef LOSTDIR +#define PREDEFROOTDIR (PREDEFDIR + 1) +#else +#define PREDEFROOTDIR PREDEFDIR +#endif + +struct ext2fs_direct root_dir[] = { + { EXT2_ROOTINO, 0, 1, 0, "." }, + { EXT2_ROOTINO, 0, 2, 0, ".." }, +#ifdef LOSTDIR + { EXT2_LOSTFOUNDINO, 0, 10, 0, "lost+found" }, +#endif +}; + +#ifdef LOSTDIR +struct ext2fs_direct lost_found_dir[] = { + { EXT2_LOSTFOUNDINO, 0, 1, 0, "." }, + { EXT2_ROOTINO, 0, 2, 0, ".." }, +}; +struct ext2fs_direct pad_dir = { 0, sizeof(struct ext2fs_direct), 0, 0, "" }; +#endif + +int +fsinit(const struct timeval *tv) +{ + struct ext2fs_dinode node; +#ifdef LOSTDIR + uint i, nblks_lostfound, blk; +#endif + + /* + * Initialize the inode for the resizefs feature + */ + if (sblock.e2fs.e2fs_rev > E2FS_REV0 && + (sblock.e2fs.e2fs_features_compat & EXT2F_COMPAT_RESIZE) != 0) + init_resizeino(tv); + + /* + * Initialize the node + */ + +#ifdef LOSTDIR + /* + * Create the lost+found directory + */ + if (sblock.e2fs.e2fs_rev > E2FS_REV0 && + sblock.e2fs.e2fs_features_incompat & EXT2F_INCOMPAT_FTYPE) { + lost_found_dir[0].e2d_type = EXT2_FT_DIR; + lost_found_dir[1].e2d_type = EXT2_FT_DIR; + } + (void)makedir(lost_found_dir, __arraycount(lost_found_dir)); + + /* prepare a bit large directory for preserved files */ + nblks_lostfound = EXT2_LOSTFOUNDSIZE / sblock.e2fs_bsize; + /* ...but only with direct blocks */ + if (nblks_lostfound > NDADDR) + nblks_lostfound = NDADDR; + + memset(&node, 0, sizeof(node)); + node.e2di_mode = EXT2_IFDIR | EXT2_LOSTFOUNDUMASK; + node.e2di_uid = geteuid(); + node.e2di_size = sblock.e2fs_bsize * nblks_lostfound; + node.e2di_atime = tv->tv_sec; + node.e2di_ctime = tv->tv_sec; + node.e2di_mtime = tv->tv_sec; + node.e2di_gid = getegid(); + node.e2di_nlink = PREDEFDIR; + /* e2di_nblock is a number of disk block, not ext2fs block */ + node.e2di_nblock = fsbtodb(&sblock, nblks_lostfound); + node.e2di_blocks[0] = alloc(sblock.e2fs_bsize, node.e2di_mode); + if (node.e2di_blocks[0] == 0) { + printf("%s: can't allocate block for lost+found\n", __func__); + return 0; + } + for (i = 1; i < nblks_lostfound; i++) { + blk = alloc(sblock.e2fs_bsize, 0); + if (blk == 0) { + printf("%s: can't allocate blocks for lost+found\n", + __func__); + return 0; + } + node.e2di_blocks[i] = blk; + } + node.e2di_gen = tv->tv_sec; + wtfs(fsbtodb(&sblock, node.e2di_blocks[0]), sblock.e2fs_bsize, buf); + pad_dir.e2d_reclen = sblock.e2fs_bsize; + for (i = 1; i < nblks_lostfound; i++) { + memset(buf, 0, sblock.e2fs_bsize); + copy_dir(&pad_dir, (struct ext2fs_direct *)buf); + wtfs(fsbtodb(&sblock, node.e2di_blocks[i]), sblock.e2fs_bsize, + buf); + } + iput(&node, EXT2_LOSTFOUNDINO); +#endif + /* + * create the root directory + */ + memset(&node, 0, sizeof(node)); + if (sblock.e2fs.e2fs_rev > E2FS_REV0 && + sblock.e2fs.e2fs_features_incompat & EXT2F_INCOMPAT_FTYPE) { + root_dir[0].e2d_type = EXT2_FT_DIR; + root_dir[1].e2d_type = EXT2_FT_DIR; +#ifdef LOSTDIR + root_dir[2].e2d_type = EXT2_FT_DIR; +#endif + } + node.e2di_mode = EXT2_IFDIR | EXT2_UMASK; + node.e2di_uid = geteuid(); + node.e2di_size = makedir(root_dir, __arraycount(root_dir)); + node.e2di_atime = tv->tv_sec; + node.e2di_ctime = tv->tv_sec; + node.e2di_mtime = tv->tv_sec; + node.e2di_gid = getegid(); + node.e2di_nlink = PREDEFROOTDIR; + /* e2di_nblock is a number of disk block, not ext2fs block */ + node.e2di_nblock = fsbtodb(&sblock, 1); + node.e2di_blocks[0] = alloc(node.e2di_size, node.e2di_mode); + if (node.e2di_blocks[0] == 0) { + printf("%s: can't allocate block for root dir\n", __func__); + return 0; + } + wtfs(fsbtodb(&sblock, node.e2di_blocks[0]), sblock.e2fs_bsize, buf); + iput(&node, EXT2_ROOTINO); + return 1; +} + +/* + * Construct a set of directory entries in "buf". + * return size of directory. + */ +int +makedir(struct ext2fs_direct *protodir, int entries) +{ + uint8_t *cp; + uint i, spcleft; + uint dirblksiz; + + dirblksiz = sblock.e2fs_bsize; + memset(buf, 0, dirblksiz); + spcleft = dirblksiz; + for (cp = buf, i = 0; i < entries - 1; i++) { + protodir[i].e2d_reclen = EXT2FS_DIRSIZ(protodir[i].e2d_namlen); + copy_dir(&protodir[i], (struct ext2fs_direct *)cp); + cp += protodir[i].e2d_reclen; + spcleft -= protodir[i].e2d_reclen; + } + protodir[i].e2d_reclen = spcleft; + copy_dir(&protodir[i], (struct ext2fs_direct *)cp); + return dirblksiz; +} + +/* + * Copy a direntry to a buffer, in fs byte order + */ +static void +copy_dir(struct ext2fs_direct *dir, struct ext2fs_direct *dbuf) +{ + + memcpy(dbuf, dir, EXT2FS_DIRSIZ(dir->e2d_namlen)); + dbuf->e2d_ino = h2fs32(dir->e2d_ino); + dbuf->e2d_reclen = h2fs16(dir->e2d_reclen); +} + +/* + * void init_resizeino(const struct timeval *tv); + * + * Initialize the EXT2_RESEIZE_INO inode to prereserve + * reserved group descriptor blocks for future growth of this ext2fs. + */ +void +init_resizeino(const struct timeval *tv) +{ + struct ext2fs_dinode node; + uint64_t isize; + uint32_t *dindir_block, *reserved_gdb; + uint nblock, i, cylno, n; + + memset(&node, 0, sizeof(node)); + + /* + * Note this function only prepares required structures for + * future resize. It's a quite different work to implement + * a utility like resize_ext2fs(8) which handles actual + * resize ops even on offline. + * + * Anyway, I'm not sure if there is any documentation about + * this resize ext2fs feature and related data structures, + * and I've written this function based on things what I see + * on some existing implementation and real file system data + * created by existing tools. To be honest, they are not + * so easy to read, so I will try to implement it here without + * any dumb optimization for people who would eventually + * work on "yet another wheel" like resize_ext2fs(8). + */ + + /* + * I'm not sure what type is appropriate for this inode. + * The release notes of e2fsprogs says they changed e2fsck to allow + * IFREG for RESIZEINO since a certain resize tool used it. Hmm. + */ + node.e2di_mode = EXT2_IFREG | 0600; + node.e2di_uid = geteuid(); + node.e2di_atime = tv->tv_sec; + node.e2di_ctime = tv->tv_sec; + node.e2di_mtime = tv->tv_sec; + node.e2di_gid = getegid(); + node.e2di_nlink = 1; + + /* + * To preserve the reserved group descriptor blocks, + * EXT2_RESIZEINO uses only double indirect reference + * blocks in its inode entries. + * + * All entries for direct, single indirect and triple + * indirect references are left zero'ed. Maybe it's safe + * because no write operation will happen with this inode. + * + * We have to allocate a block for the first level double + * indirect reference block. Indexes of inode entries in + * this first level dindirect block are corresponding to + * indexes of group descriptors including both used (e2fs_ngdb) + * and reserved (e2fs_reserved_ngdb) group descriptor blocks. + * + * Inode entries of indexes for used (e2fs_ngdb) descriptors are + * left zero'ed. Entries for reserved (e2fs_reserved_ngdb) ones + * have block numbers of actual reserved group descriptors + * allocated at block group zero. This means e2fs_reserved_ngdb + * blocks are reserved as the second level dindirect reference + * blocks, and they acutually contain block numberf of indirect + * references. It may be safe since they don't have to keep any + * data yet. + * + * Each these second dindirect blocks (i.e. reserved group + * descriptor blocks in the first block group) should have + * block numbers of its backups in all other block groups. + * I.e. reserved_ngdb[0] block in block group 0 contains block + * numbers of resreved_ngdb[0] from group 1 through (e2fs_ncg - 1). + * The number of backups can be determined by the + * EXT2_ROCOMPAT_SPARSESUPER feature and cg_has_sb() macro + * as done in the above initcg() function. + */ + + /* set e2di_size which occupies whole blocks through DINDIR blocks */ + isize = (uint64_t)sblock.e2fs_bsize * NDADDR + + (uint64_t)sblock.e2fs_bsize * NINDIR(&sblock) + + (uint64_t)sblock.e2fs_bsize * NINDIR(&sblock) * NINDIR(&sblock); + if (isize > UINT32_MAX && + (sblock.e2fs.e2fs_features_rocompat & + EXT2F_ROCOMPAT_LARGEFILE) == 0) { + /* XXX should enable it here and update all backups? */ + errx(EXIT_FAILURE, "%s: large_file rocompat feature is " + "required to enable resize feature for this filesystem\n", + __func__); + } + /* upper 32bit is stored into e2di_dacl on REV1 feature */ + node.e2di_size = isize & UINT32_MAX; + node.e2di_dacl = isize >> 32; + +#define SINGLE 0 /* index of single indirect block */ +#define DOUBLE 1 /* index of double indirect block */ +#define TRIPLE 2 /* index of triple indirect block */ + + /* zero out entries for direct references */ + for (i = 0; i < NDADDR; i++) + node.e2di_blocks[i] = 0; + /* also zero out entries for single and triple indirect references */ + node.e2di_blocks[NDADDR + SINGLE] = 0; + node.e2di_blocks[NDADDR + TRIPLE] = 0; + + /* allocate a block for the first level double indirect reference */ + node.e2di_blocks[NDADDR + DOUBLE] = + alloc(sblock.e2fs_bsize, node.e2di_mode); + if (node.e2di_blocks[NDADDR + DOUBLE] == 0) + errx(EXIT_FAILURE, "%s: Can't allocate a dindirect block", + __func__); + + /* account this first block */ + nblock = fsbtodb(&sblock, 1); + + /* allocate buffer to set data in the dindirect block */ + dindir_block = malloc(sblock.e2fs_bsize); + if (dindir_block == NULL) + errx(EXIT_FAILURE, + "%s: Can't allocate buffer for a dindirect block", + __func__); + + /* allocate buffer to set data in the group descriptor blocks */ + reserved_gdb = malloc(sblock.e2fs_bsize); + if (reserved_gdb == NULL) + errx(EXIT_FAILURE, + "%s: Can't allocate buffer for group descriptor blocks", + __func__); + + /* + * Setup block entries in the first level dindirect blocks + */ + for (i = 0; i < sblock.e2fs_ngdb; i++) { + /* no need to handle used group descriptor blocks */ + dindir_block[i] = 0; + } + for (; i < sblock.e2fs_ngdb + sblock.e2fs.e2fs_reserved_ngdb; i++) { + /* + * point reserved group descriptor block in the first + * (i.e. master) block group + * + * XXX: e2fsprogs seem to use "(i % NINDIR(&sblock))" here + * to store maximum NINDIR(&sblock) reserved gdbs. + * I'm not sure what will be done on future filesystem + * shrink in that case on their way. + */ + if (i >= NINDIR(&sblock)) + errx(EXIT_FAILURE, "%s: too many reserved " + "group descriptors (%u) for resize inode", + __func__, sblock.e2fs.e2fs_reserved_ngdb); + dindir_block[i] = + h2fs32(cgbase(&sblock, 0) + NBLOCK_SUPERBLOCK + i); + + /* + * Setup block entries in the second dindirect blocks + * (which are primary reserved group descriptor blocks) + * to point their backups. + */ + for (n = 0, cylno = 1; cylno < sblock.e2fs_ncg; cylno++) { + /* skip block groups without backup */ + if ((sblock.e2fs.e2fs_features_rocompat & + EXT2F_ROCOMPAT_SPARSESUPER) != 0 && + cg_has_sb(cylno) == 0) + continue; + + if (n >= NINDIR(&sblock)) + errx(EXIT_FAILURE, "%s: too many block groups " + "for the resize feature", __func__); + /* + * These blocks are already reserved in + * initcg() so no need to use alloc() here. + */ + reserved_gdb[n++] = h2fs32(cgbase(&sblock, cylno) + + NBLOCK_SUPERBLOCK + i); + nblock += fsbtodb(&sblock, 1); + } + for (; n < NINDIR(&sblock); n++) + reserved_gdb[n] = 0; + + /* write group descriptor block as the second dindirect refs */ + wtfs(fsbtodb(&sblock, fs2h32(dindir_block[i])), + sblock.e2fs_bsize, reserved_gdb); + nblock += fsbtodb(&sblock, 1); + } + for (; i < NINDIR(&sblock); i++) { + /* leave trailing entries unallocated */ + dindir_block[i] = 0; + } + free(reserved_gdb); + + /* finally write the first level dindirect block */ + wtfs(fsbtodb(&sblock, node.e2di_blocks[NDADDR + DOUBLE]), + sblock.e2fs_bsize, dindir_block); + free(dindir_block); + + node.e2di_nblock = nblock; + iput(&node, EXT2_RESIZEINO); +} + +/* + * uint32_t alloc(uint32_t size, uint16_t mode) + * + * Allocate a block (from cylinder group 0) + * Reference: src/sys/ufs/ext2fs/ext2fs_alloc.c:ext2fs_alloccg() + */ +uint32_t +alloc(uint32_t size, uint16_t mode) +{ + uint32_t loc, bno; + uint8_t *bbp; + uint len, map, i; + + if (gd[0].ext2bgd_nbfree == 0) + return 0; + + if (size > sblock.e2fs_bsize) + return 0; + + bbp = malloc(sblock.e2fs_bsize); + if (bbp == NULL) + return 0; + rdfs(fsbtodb(&sblock, gd[0].ext2bgd_b_bitmap), sblock.e2fs_bsize, bbp); + + /* XXX: kernel uses e2fs_fpg here */ + len = sblock.e2fs.e2fs_bpg / NBBY; + +#if 0 /* no need block allocation for root or lost+found dir */ + for (loc = 0; loc < len; loc++) { + if (bbp[loc] == 0) { + bno = loc * NBBY; + goto gotit; + } + } +#endif + + loc = skpc(~0U, len, bbp); + if (loc == 0) + return 0; + loc = len - loc; + map = bbp[loc]; + bno = loc * NBBY; + for (i = 0; i < NBBY; i++, bno++) { + if ((map & (1 << i)) == 0) + goto gotit; + } + return 0; + + gotit: + if (isset(bbp, bno)) + errx(EXIT_FAILURE, "%s: inconsistent bitmap\n", __func__); + + setbit(bbp, bno); + wtfs(fsbtodb(&sblock, gd[0].ext2bgd_b_bitmap), sblock.e2fs_bsize, bbp); + free(bbp); + /* XXX: modified group descriptors won't be written into backups */ + gd[0].ext2bgd_nbfree--; + if ((mode & EXT2_IFDIR) != 0) + gd[0].ext2bgd_ndirs++; + sblock.e2fs.e2fs_fbcount--; + + return sblock.e2fs.e2fs_first_dblock + bno; +} + +/* + * void iput(struct ext2fs_dinode *ip, ino_t ino) + * + * Put an inode entry into the corresponding table. + */ +static void +iput(struct ext2fs_dinode *ip, ino_t ino) +{ + daddr_t d; + uint c, i; + struct ext2fs_dinode *dp; + uint8_t *bp; + + bp = malloc(sblock.e2fs_bsize); + if (bp == NULL) + errx(EXIT_FAILURE, "%s: can't allocate buffer for inode\n", + __func__); + + /* + * Reserved inodes are allocated and accounted in initcg() + * so skip checks of the bitmap and allocation for them. + */ + if (ino >= EXT2_FIRSTINO) { + c = ino_to_cg(&sblock, ino); + + /* sanity check */ + if (gd[c].ext2bgd_nifree == 0) + errx(EXIT_FAILURE, + "%s: no free inode %" PRId64 " in block group %u\n", + __func__, ino, c); + + /* update inode bitmap */ + rdfs(fsbtodb(&sblock, gd[0].ext2bgd_i_bitmap), + sblock.e2fs_bsize, bp); + + /* more sanity */ + if (isset(bp, EXT2_INO_INDEX(ino))) + errx(EXIT_FAILURE, "%s: inode %u already in use\n", + __func__, (uint)ino); + setbit(bp, EXT2_INO_INDEX(ino)); + wtfs(fsbtodb(&sblock, gd[0].ext2bgd_i_bitmap), + sblock.e2fs_bsize, bp); + gd[c].ext2bgd_nifree--; + sblock.e2fs.e2fs_ficount--; + } + + if (ino >= sblock.e2fs.e2fs_ipg * sblock.e2fs_ncg) + errx(EXIT_FAILURE, "%s: inode value out of range (%llu).\n", + __func__, (unsigned long long)ino); + + /* update an inode entry in the table */ + d = fsbtodb(&sblock, ino_to_fsba(&sblock, ino)); + rdfs(d, sblock.e2fs_bsize, bp); + + dp = (struct ext2fs_dinode *)bp; + dp += ino_to_fsbo(&sblock, ino); + e2fs_isave(ip, dp); + /* e2fs_i_bswap() doesn't swap e2di_blocks addrs */ + if ((ip->e2di_mode & EXT2_IFMT) != EXT2_IFLNK) { + for (i = 0; i < NDADDR + NIADDR; i++) + dp->e2di_blocks[i] = h2fs32(ip->e2di_blocks[i]); + } + + wtfs(d, sblock.e2fs_bsize, bp); + free(bp); +} + +/* + * Read a block from the file system + */ +void +rdfs(daddr_t bno, int size, void *bf) +{ + int n; + off_t offset; + + offset = bno; + n = pread(fsi, bf, size, offset * sectorsize); + if (n != size) + errx(EXIT_FAILURE, "%s: read error for sector %lld: %s\n", + __func__, (long long)bno, strerror(errno)); +} + +/* + * Write a block to the file system + */ +void +wtfs(daddr_t bno, int size, void *bf) +{ + int n; + off_t offset; + + if (Nflag) + return; + offset = bno; + n = pwrite(fso, bf, size, offset * sectorsize); + if (n != size) + errx(EXIT_FAILURE, "%s: write error for sector %lld: %s\n", + __func__, (long long)bno, strerror(errno)); +} + +int +ilog2(uint val) +{ + + if (val == 0 || !powerof2(val)) + errx(EXIT_FAILURE, "%s: %d is not a power of 2\n", + __func__, val); + + return ffs(val) - 1; +} + +/* + * int skpc(int mask, size_t size, uint8_t *cp) + * + * Locate an unsigned character of value mask inside cp[]. + * (from src/sys/lib/libkern/skpc.c) + */ +int +skpc(int mask, size_t size, uint8_t *cp) +{ + uint8_t *end; + + end = &cp[size]; + while (cp < end && *cp == (uint8_t)mask) + cp++; + + return end - cp; +} diff --git a/sbin/newfs_ext2fs/newfs_ext2fs.8 b/sbin/newfs_ext2fs/newfs_ext2fs.8 new file mode 100644 index 000000000000..39a4d327d17f --- /dev/null +++ b/sbin/newfs_ext2fs/newfs_ext2fs.8 @@ -0,0 +1,321 @@ +.\" $NetBSD: newfs_ext2fs.8,v 1.1 2007/11/17 16:50:26 tsutsui Exp $ +.\" +.\" Copyright (c) 1983, 1987, 1991, 1993, 1994 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)newfs.8 8.6 (Berkeley) 5/3/95 +.\" +.Dd November 17, 2007 +.Dt NEWFS_EXT2FS 8 +.Os +.Sh NAME +.Nm newfs_ext2fs +.Nd construct a new Ext2 file system +.Sh SYNOPSIS +.Nm +.Op Fl FINZ +.Op Fl b Ar block-size +.Op Fl f Ar frag-size +.Op Fl i Ar bytes-per-inode +.Op Fl m Ar free-space +.Op Fl n Ar inodes +.Op Fl O Ar filesystem-format +.Op Fl S Ar sector-size +.Op Fl s Ar size +.Op Fl V Ar verbose +.Op Fl v Ar volname +.Ar special +.Sh DESCRIPTION +.Nm +is used to initialize and clear Ext2 file systems before first use. +Before running +.Nm +the disk must be labeled using +.Xr disklabel 8 . +.Nm +builds a file system on the specified special device +basing its defaults on the information in the disk label. +Typically the defaults are reasonable, however +.Nm +has numerous options to allow the defaults to be selectively overridden. +.Pp +Options with numeric arguments may contain an optional (case-insensitive) +suffix: +.Bl -tag -width 3n -offset indent -compact +.It b +Bytes; causes no modification. +(Default) +.It k +Kilo; multiply the argument by 1024 +.It m +Mega; multiply the argument by 1048576 +.It g +Giga; multiply the argument by 1073741824 +.El +.Pp +The following options define the general layout policies. +.Bl -tag -width Fl +.It Fl b Ar block-size +The block size of the file system, in bytes. +It must be a power of two. +The smallest allowable size is 1024 bytes. +The default size depends upon the size of the file system: +.Pp +.Bl -tag -width "file system size" -compact -offset indent +.It Sy "file system size" +.Ar block-size +.It \*[Lt]= 512 MB +1 KB +.It \*[Gt] 512 MB +4 KB +.El +.It Fl F +Create a file system image in +.Ar special . +The file system size needs to be specified with +.Dq Fl s Ar size . +No attempts to use or update the disk label will be made. +.It Fl f Ar frag-size +The fragment size of the file system in bytes. +It must be the same with blocksize because current ext2fs +implementation doesn't support fragmentation. +.It Fl I +Do not require that the file system type listed in the disk label is +.Ql Linux Ext2 . +.It Fl i Ar bytes-per-inode +This specifies the density of inodes in the file system. +If fewer inodes are desired, a larger number should be used; +to create more inodes a smaller number should be given. +.It Fl m Ar free-space +The percentage of space reserved from normal users; the minimum free +space threshold. +The default value used is 5%. +.It Fl N +Causes the file system parameters to be printed out +without really creating the file system. +.It Fl n Ar inodes +This specifies the number of inodes for the file system. +If both +.Fl i +and +.Fl n +are specified then +.Fl n +takes precedence. +The default number of inodes is calculated from a number of blocks in +the file system. +.It Fl O Ar filesystem-format +Select the filesystem-format +.Bl -tag -width 3n -offset indent -compact +.It 0 +.Ql GOOD_OLD_REV ; +This option is primarily used to build root file systems that can be +understood by old or dumb firmwares for bootstrap. (default) +.It 1 +.Ql DYNAMIC_REV ; +Various extended (and sometimes incompatible) features are enabled +(though not all features are supported on +.Nx ) . +Currently only the following features are supported: +.Bl -tag -width "SPARSESUPER" -offset indent -compact +.It RESIZE +Prepare some reserved structures which enable future file system resizing. +.It FTYPE +Store file types in directory entries to improve performance. +.It SPARSESUPER +Prepare superblock backups for the +.Xr fsck_ext2fs 8 +utility on not all but sparse block groups. +.It LARGEFILE +Enable files larger than 2G bytes. +.El +.El +.It Fl s Ar size +The size of the file system in sectors. +An +.Sq s +suffix will be interpreted as the number of sectors (the default). +All other suffixes are interpreted as per other numeric arguments, +except that the number is converted into sectors by dividing by the +sector size (as specified by +.Fl S Ar secsize ) +after suffix interpretation. +.Pp +If no +.Fl s Ar size +is specified then the filesystem size defaults to that of the partition, or, +if +.Fl F +is specified, the existing file. +.Pp +If +.Ar size +is negative the specified size is subtracted from the default size +(reserving space at the end of the partition). +.It Fl v Ar volname +This specifies a volume name for the file system. +.It Fl V Ar verbose +This controls the amount of information written to stdout: +.Bl -tag -width 3n -offset indent -compact +.It 0 +No output +.It 1 +Overall size and cylinder group details. +.It 2 +A progress bar (dots ending at right hand margin). +.It 3 +The first few super-block backup sector numbers are displayed before the +progress bar. +.It 4 +All the super-block backup sector numbers are displayed (no progress bar). +.El +The default is 3. +If +.Fl N +is specifed +.Nm +stops before outputting the progress bar. +.It Fl Z +Pre-zeros the file system image created with +.Fl F . +This is necessary if the image is to be used by +.Xr vnd 4 +(which doesn't support file systems with +.Sq holes ) . +.El +.Pp +The following option overrides the standard sizes for the disk geometry. +The default value is taken from the disk label. +Changing this default is useful only when using +.Nm +to build a file system whose raw image will eventually be used on a +different type of disk than the one on which it is initially created +(for example on a write-once disk). +Note that changing this value from its default will make it impossible for +.Xr fsck_ext2fs 8 +to find the alternative superblocks if the standard superblock is lost. +.Bl -tag -width Fl +.It Fl S Ar sector-size +The size of a sector in bytes (almost never anything but 512). +Defaults to 512. +.El +.Sh NOTES +There is no option to specify the metadata byte order on the file system +to be created because native Ext2 file system is always little endian +even on big endian hosts. +.Pp +The file system is created with +.Sq random +inode generation numbers to improve NFS security. +.Pp +The owner and group ids of the root node and reserved blocks of the new +file system are set to the effective uid and gid of the user initializing +the file system. +.Pp +For the +.Nm +command to succeed, +the disk label should first be updated such that the fstype field for the +partition is set to +.Ql Linux Ext2 , +unless +.Fl F +or +.Fl I +is used. +.Pp +.\" To create and populate a filesystem image within a file use the +.\" .Xr makefs 8 +.\" utility. +.\" .Pp +The partition size is found using +.Xr fstat 2 +not by inspecting the disklabel. +The block size and fragment size will be written back to the disklabel +only if the last character of +.Ar special +references the same partition as the minor device number. +that provide disk like block and character devices. +.Sh SEE ALSO +.Xr fstat 2 , +.Xr disklabel 5 , +.Xr disktab 5 , +.Xr fs 5 , +.Xr disklabel 8 , +.Xr diskpart 8 , +.\" .Xr dumpfs 8 , +.\" .Xr format 8 , +.Xr fsck_ext2fs 8 , +.\" .Xr makefs 8 , +.Xr mount 8 , +.Xr mount_ext2fs 8 , +.Xr newfs 8 , +.Rs +.%A Remy Card +.%A Theodore Ts'o +.%A Stephen Tweedie +.%T "Design and Implementation of the Second Extended Filesystem" +.%J "The Proceedings of the First Dutch International Symposium on Linux" +.Lk http://e2fsprogs.sourceforge.net/ext2intro.html +.Re +.Sh HISTORY +The +.Nm +command first appeared in +.Nx 5.0 . +.Sh AUTHORS +The +.Nm +command was written by +.An Izumi Tsutsui +.Aq tsutsui@NetBSD.org . +.Sh BUGS +The +.Nm +command is still experimental and there are few sanity checks. +.Pp +The +.Nm +command doesn't have options to specify each REV1 file system feature +independently. +.Pp +The +.Nm +command doesn't support the bad block list accounted by the bad blocks inode. +.Pp +Many newer Ext2 file system features (especially journaling) are +not supported yet. +.Pp +Some features in file systems created by the +.Nm +command might not be recognized properly by the +.Xr fsck_ext2fs 8 +utility. +.Pp +There is no native tool in the +.Nx +distribution for resizing Ext2 file systems yet. diff --git a/sbin/newfs_ext2fs/newfs_ext2fs.c b/sbin/newfs_ext2fs/newfs_ext2fs.c new file mode 100644 index 000000000000..1eaafa274453 --- /dev/null +++ b/sbin/newfs_ext2fs/newfs_ext2fs.c @@ -0,0 +1,477 @@ +/* $NetBSD: newfs_ext2fs.c,v 1.1 2007/11/17 16:50:26 tsutsui Exp $ */ + +/* + * Copyright (c) 1983, 1989, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1983, 1989, 1993, 1994\n\ + The Regents of the University of California. All rights reserved.\n"); +#endif /* not lint */ + +#ifndef lint +#if 0 +static char sccsid[] = "@(#)newfs.c 8.13 (Berkeley) 5/1/95"; +#else +__RCSID("$NetBSD: newfs_ext2fs.c,v 1.1 2007/11/17 16:50:26 tsutsui Exp $"); +#endif +#endif /* not lint */ + +/* + * newfs: friendly front end to mke2fs + */ +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "extern.h" +#include "partutil.h" + +static int64_t strsuftoi64(const char *, const char *, int64_t, int64_t, int *); +static void usage(void) __attribute__((__noreturn__)); + +/* + * For file systems smaller than SMALL_FSSIZE we use the S_DFL_* defaults, + * otherwise if less than MEDIUM_FSSIZE use M_DFL_*, otherwise use + * L_DFL_*. + */ +#define SMALL_FSSIZE ((4 * 1024 * 1024) / sectorsize) /* 4MB */ +#define S_DFL_BSIZE 1024 +#define MEDIUM_FSSIZE ((512 * 1024 * 1024) / sectorsize) /* 512MB */ +#define M_DFL_BSIZE 1024 +#define L_DFL_BSIZE 4096 + +/* + * Each file system has a number of inodes statically allocated. + * We allocate one inode slot per 2, 4, or 8 blocks, expecting this + * to be far more than we will ever need. + */ +#define S_DFL_NINODE(blocks) ((blocks) / 8) +#define M_DFL_NINODE(blocks) ((blocks) / 4) +#define L_DFL_NINODE(blocks) ((blocks) / 2) + +/* + * Default sector size. + */ +#define DFL_SECSIZE 512 + +int Nflag; /* run without writing file system */ +int Oflag = 0; /* format as conservative REV0 by default */ +int verbosity; /* amount of printf() output */ +#define DEFAULT_VERBOSITY 3 /* 4 is traditional behavior of newfs(8) */ +int64_t fssize; /* file system size */ +uint sectorsize; /* bytes/sector */ +uint fsize = 0; /* fragment size */ +uint bsize = 0; /* block size */ +uint minfree = MINFREE; /* free space threshold */ +uint density; /* number of bytes per inode */ +uint num_inodes; /* number of inodes (overrides density) */ +char *volname = NULL; /* volume name */ + +static char *disktype = NULL; +static char device[MAXPATHLEN]; + +static const char lmsg[] = "%s: can't read disk label"; + +int +main(int argc, char *argv[]) +{ + struct disk_geom geo; + struct dkwedge_info dkw; + struct statvfs *mp; + struct stat sb; + int ch, fsi, fso, len, n, Fflag, Iflag, Zflag; + char *cp, *s1, *s2, *special; + const char *opstring; + int byte_sized; + uint blocks; /* number of blocks */ + + cp = NULL; + fsi = fso = -1; + Fflag = Iflag = Zflag = 0; + verbosity = -1; + opstring = "FINO:S:V:Zb:f:i:l:m:n:s:v:"; + byte_sized = 0; + while ((ch = getopt(argc, argv, opstring)) != -1) + switch (ch) { + case 'F': + Fflag = 1; + break; + case 'I': + Iflag = 1; + break; + case 'N': + Nflag = 1; + if (verbosity == -1) + verbosity = DEFAULT_VERBOSITY; + break; + case 'O': + Oflag = strsuftoi64("format", optarg, 0, 1, NULL); + break; + case 'S': + /* + * XXX: + * non-512 byte sectors almost certainly don't work. + */ + sectorsize = strsuftoi64("sector size", + optarg, 512, 65536, NULL); + if (!powerof2(sectorsize)) + errx(EXIT_FAILURE, + "sector size `%s' is not a power of 2.", + optarg); + break; + case 'V': + verbosity = strsuftoi64("verbose", optarg, 0, 4, NULL); + break; + case 'Z': + Zflag = 1; + break; + case 'b': + bsize = strsuftoi64("block size", + optarg, MINBSIZE, MAXBSIZE, NULL); + break; + case 'f': + fsize = strsuftoi64("fragment size", + optarg, 1, MAXBSIZE, NULL); + break; + case 'i': + density = strsuftoi64("bytes per inode", + optarg, 1, INT_MAX, NULL); + break; + case 'm': + minfree = strsuftoi64("free space %", + optarg, 0, 99, NULL); + break; + case 'n': + num_inodes = strsuftoi64("number of inodes", + optarg, 1, INT_MAX, NULL); + break; + case 's': + fssize = strsuftoi64("file system size", + optarg, INT64_MIN, INT64_MAX, &byte_sized); + break; + case 'v': + volname = optarg; + if (volname[0] == '\0') + errx(EXIT_FAILURE, + "Volume name cannot be zero length"); + break; + case '?': + default: + usage(); + } + argc -= optind; + argv += optind; + + if (verbosity == -1) + /* Default to showing cg info */ + verbosity = DEFAULT_VERBOSITY; + + if (argc != 1) + usage(); + + memset(&sb, 0, sizeof(sb)); + memset(&dkw, 0, sizeof(dkw)); + special = argv[0]; + if (Fflag) { + int fl; + /* + * It's a file system image + * no label, use fixed default for sectorsize. + */ + if (sectorsize == 0) + sectorsize = DFL_SECSIZE; + + /* creating image in a regular file */ + if (Nflag) + fl = O_RDONLY; + else { + if (fssize > 0) + fl = O_RDWR | O_CREAT; + else + fl = O_RDWR; + } + fsi = open(special, fl, 0777); + if (fsi == -1) + err(EXIT_FAILURE, "can't open file %s", special); + if (fstat(fsi, &sb) == -1) + err(EXIT_FAILURE, "can't fstat opened %s", special); + if (!Nflag) + fso = fsi; + } else { /* !Fflag */ + fsi = opendisk(special, O_RDONLY, device, sizeof(device), 0); + special = device; + if (fsi < 0 || fstat(fsi, &sb) == -1) + err(EXIT_FAILURE, "%s: open for read", special); + + if (!Nflag) { + fso = open(special, O_WRONLY, 0); + if (fso < 0) + err(EXIT_FAILURE, + "%s: open for write", special); + + /* Bail if target special is mounted */ + n = getmntinfo(&mp, MNT_NOWAIT); + if (n == 0) + err(EXIT_FAILURE, "%s: getmntinfo", special); + + len = sizeof(_PATH_DEV) - 1; + s1 = special; + if (strncmp(_PATH_DEV, s1, len) == 0) + s1 += len; + + while (--n >= 0) { + s2 = mp->f_mntfromname; + if (strncmp(_PATH_DEV, s2, len) == 0) { + s2 += len - 1; + *s2 = 'r'; + } + if (strcmp(s1, s2) == 0 || + strcmp(s1, &s2[1]) == 0) + errx(EXIT_FAILURE, + "%s is mounted on %s", + special, mp->f_mntonname); + ++mp; + } + } + + if (getdiskinfo(special, fsi, disktype, &geo, &dkw) == -1) + errx(EXIT_FAILURE, lmsg, special); + + if (sectorsize == 0) { + sectorsize = geo.dg_secsize; + if (sectorsize <= 0) + errx(EXIT_FAILURE, "no default sector size"); + } + + if (dkw.dkw_parent[0]) { + if (dkw.dkw_size == 0) + errx(EXIT_FAILURE, + "%s partition is unavailable", special); + + if (!Iflag) { + static const char m[] = + "%s partition type is not `%s' (or use -I)"; + if (strcmp(dkw.dkw_ptype, DKW_PTYPE_EXT2FS)) + errx(EXIT_FAILURE, m, + special, "Linux Ext2"); + } + } + } + + if (byte_sized) + fssize /= sectorsize; + if (fssize <= 0) { + if (sb.st_size != 0) + fssize += sb.st_size / sectorsize; + else + fssize += dkw.dkw_size; + if (fssize <= 0) + errx(EXIT_FAILURE, + "Unable to determine file system size"); + } + + if (dkw.dkw_parent[0] && fssize > dkw.dkw_size) + errx(EXIT_FAILURE, + "size %" PRIu64 " exceeds maximum file system size on " + "`%s' of %" PRIu64 " sectors", + fssize, special, dkw.dkw_size); + + /* XXXLUKEM: only ftruncate() regular files ? (dsl: or at all?) */ + if (Fflag && fso != -1 + && ftruncate(fso, (off_t)fssize * sectorsize) == -1) + err(1, "can't ftruncate %s to %" PRId64, special, fssize); + + if (Zflag && fso != -1) { /* pre-zero (and de-sparce) the file */ + char *buf; + int bufsize, i; + off_t bufrem; + struct statvfs sfs; + + if (fstatvfs(fso, &sfs) == -1) { + warn("can't fstatvfs `%s'", special); + bufsize = 8192; + } else + bufsize = sfs.f_iosize; + + if ((buf = calloc(1, bufsize)) == NULL) + err(1, "can't malloc buffer of %d", + bufsize); + bufrem = fssize * sectorsize; + if (verbosity > 0) + printf("Creating file system image in `%s', " + "size %" PRId64 " bytes, in %d byte chunks.\n", + special, bufrem, bufsize); + while (bufrem > 0) { + i = write(fso, buf, MIN(bufsize, bufrem)); + if (i == -1) + err(1, "writing image"); + bufrem -= i; + } + free(buf); + } + + /* Sort out fragment and block sizes */ + if (bsize == 0) { + bsize = fsize; + if (bsize == 0) { + if (fssize < SMALL_FSSIZE) + bsize = S_DFL_BSIZE; + else if (fssize < MEDIUM_FSSIZE) + bsize = M_DFL_BSIZE; + else + bsize = L_DFL_BSIZE; + } + } + if (fsize == 0) + fsize = bsize; + + blocks = fssize * sectorsize / bsize; + + if (num_inodes == 0) { + if (density != 0) + num_inodes = fssize / density; + else { + if (fssize < SMALL_FSSIZE) + num_inodes = S_DFL_NINODE(blocks); + else if (fssize < MEDIUM_FSSIZE) + num_inodes = M_DFL_NINODE(blocks); + else + num_inodes = L_DFL_NINODE(blocks); + } + } + mke2fs(special, fsi, fso); + + if (fsi != -1) + close(fsi); + if (fso != -1 && fso != fsi) + close(fso); + exit(EXIT_SUCCESS); +} + +static int64_t +strsuftoi64(const char *desc, const char *arg, int64_t min, int64_t max, + int *num_suffix) +{ + int64_t result, r1; + int shift = 0; + char *ep; + + errno = 0; + r1 = strtoll(arg, &ep, 10); + if (ep[0] != '\0' && ep[1] != '\0') + errx(EXIT_FAILURE, + "%s `%s' is not a valid number.", desc, arg); + switch (ep[0]) { + case '\0': + case 's': + case 'S': + if (num_suffix != NULL) + *num_suffix = 0; + break; + case 'g': + case 'G': + shift += 10; + /* FALLTHROUGH */ + case 'm': + case 'M': + shift += 10; + /* FALLTHROUGH */ + case 'k': + case 'K': + shift += 10; + /* FALLTHROUGH */ + case 'b': + case 'B': + if (num_suffix != NULL) + *num_suffix = 1; + break; + default: + errx(EXIT_FAILURE, + "`%s' is not a valid suffix for %s.", ep, desc); + } + result = r1 << shift; + if (errno == ERANGE || result >> shift != r1) + errx(EXIT_FAILURE, + "%s `%s' is too large to convert.", desc, arg); + if (result < min) + errx(EXIT_FAILURE, + "%s `%s' (%" PRId64 ") is less than the minimum (%" + PRId64 ").", desc, arg, result, min); + if (result > max) + errx(EXIT_FAILURE, + "%s `%s' (%" PRId64 ") is greater than the maximum (%" + PRId64 ").", desc, arg, result, max); + return result; +} + +static const char help_strings[] = + "\t-F \t\tcreate file system image in regular file\n" + "\t-I \t\tdo not check that the file system type is `Linux Ext2'\n" + "\t-N \t\tdo not create file system, just print out parameters\n" + "\t-O N\t\tfilesystem revision: 0 ==> REV0, 1 ==> REV1 (default 0)\n" + "\t-S secsize\tsector size\n" + "\t-V verbose\toutput verbosity: 0 ==> none, 4 ==> max\n" + "\t-Z \t\tpre-zero the image file\n" + "\t-b bsize\tblock size\n" + "\t-f fsize\tfragment size\n" + "\t-i density\tnumber of bytes per inode\n" + "\t-m minfree\tminimum free space %\n" + "\t-n inodes\tnumber of inodes (overrides -i density)\n" + "\t-s fssize\tfile system size (sectors)\n" + "\t-v volname\text2fs volume name\n"; + +static void +usage(void) +{ + + fprintf(stderr, + "usage: %s [ fsoptions ] special-device\n", getprogname()); + fprintf(stderr, "where fsoptions are:\n"); + fprintf(stderr, "%s", help_strings); + + exit(EXIT_FAILURE); +}