372 lines
14 KiB
Groff
372 lines
14 KiB
Groff
.\" $NetBSD: fs.5,v 1.18 2010/03/22 18:58:32 joerg Exp $
|
|
.\"
|
|
.\" Copyright (c) 1983, 1991, 1993
|
|
.\" The Regents of the University of California. All rights reserved.
|
|
.\"
|
|
.\" Redistribution and use in source and binary forms, with or without
|
|
.\" modification, are permitted provided that the following conditions
|
|
.\" are met:
|
|
.\" 1. Redistributions of source code must retain the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer.
|
|
.\" 2. Redistributions in binary form must reproduce the above copyright
|
|
.\" notice, this list of conditions and the following disclaimer in the
|
|
.\" documentation and/or other materials provided with the distribution.
|
|
.\" 3. Neither the name of the University nor the names of its contributors
|
|
.\" may be used to endorse or promote products derived from this software
|
|
.\" without specific prior written permission.
|
|
.\"
|
|
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
.\" SUCH DAMAGE.
|
|
.\"
|
|
.\" @(#)fs.5 8.2 (Berkeley) 4/19/94
|
|
.\"
|
|
.Dd July 27, 2001
|
|
.Dt FS 5
|
|
.Os
|
|
.Sh NAME
|
|
.Nm fs ,
|
|
.Nm inode
|
|
.Nd format of file system volume
|
|
.Sh SYNOPSIS
|
|
.In sys/param.h
|
|
.In ufs/ffs/fs.h
|
|
.In ufs/ufs/inode.h
|
|
.Sh DESCRIPTION
|
|
The files
|
|
.In ufs/ffs/fs.h
|
|
and
|
|
.In ufs/ufs/inode.h
|
|
declare several structures and define variables and macros
|
|
which are used to create and manage the underlying format of
|
|
file system objects on random access devices (disks).
|
|
.Pp
|
|
The block size and number of blocks which
|
|
comprise a file system are parameters of the file system.
|
|
Sectors beginning at
|
|
.Dv BBLOCK
|
|
and continuing for
|
|
.Dv BBSIZE
|
|
are used
|
|
for a disklabel and for some hardware primary
|
|
and secondary bootstrapping programs.
|
|
.Pp
|
|
The actual file system begins at sector
|
|
.Dv SBLOCK
|
|
with the
|
|
.Em super-block
|
|
that is of size
|
|
.Dv SBSIZE .
|
|
The following structure described the super-block and is
|
|
from the file
|
|
.In ufs/ffs/fs.h :
|
|
.Bd -literal
|
|
#define FS_MAGIC 0x011954
|
|
struct fs {
|
|
int32_t fs_firstfield; /* historic file system linked list, */
|
|
int32_t fs_unused_1; /* used for incore super blocks */
|
|
int32_t fs_sblkno; /* addr of super-block in filesys */
|
|
int32_t fs_cblkno; /* offset of cyl-block in filesys */
|
|
int32_t fs_iblkno; /* offset of inode-blocks in filesys */
|
|
int32_t fs_dblkno; /* offset of first data after cg */
|
|
int32_t fs_old_cgoffset; /* cylinder group offset in cylinder */
|
|
int32_t fs_old_cgmask; /* used to calc mod fs_ntrak */
|
|
int32_t fs_old_time; /* last time written */
|
|
int32_t fs_old_size; /* number of blocks in fs */
|
|
int32_t fs_old_dsize; /* number of data blocks in fs */
|
|
int32_t fs_ncg; /* number of cylinder groups */
|
|
int32_t fs_bsize; /* size of basic blocks in fs */
|
|
int32_t fs_fsize; /* size of frag blocks in fs */
|
|
int32_t fs_frag; /* number of frags in a block in fs */
|
|
/* these are configuration parameters */
|
|
int32_t fs_minfree; /* minimum percentage of free blocks */
|
|
int32_t fs_old_rotdelay; /* num of ms for optimal next block */
|
|
int32_t fs_old_rps; /* disk revolutions per second */
|
|
/* these fields can be computed from the others */
|
|
int32_t fs_bmask; /* ``blkoff'' calc of blk offsets */
|
|
int32_t fs_fmask; /* ``fragoff'' calc of frag offsets */
|
|
int32_t fs_bshift; /* ``lblkno'' calc of logical blkno */
|
|
int32_t fs_fshift; /* ``numfrags'' calc number of frags */
|
|
/* these are configuration parameters */
|
|
int32_t fs_maxcontig; /* max number of contiguous blks */
|
|
int32_t fs_maxbpg; /* max number of blks per cyl group */
|
|
/* these fields can be computed from the others */
|
|
int32_t fs_fragshift; /* block to frag shift */
|
|
int32_t fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */
|
|
int32_t fs_sbsize; /* actual size of super block */
|
|
int32_t fs_spare1[2]; /* old fs_csmask */
|
|
/* old fs_csshift */
|
|
int32_t fs_nindir; /* value of NINDIR */
|
|
int32_t fs_inopb; /* value of INOPB */
|
|
int32_t fs_old_nspf; /* value of NSPF */
|
|
/* yet another configuration parameter */
|
|
int32_t fs_optim; /* optimization preference, see below */
|
|
/* these fields are derived from the hardware */
|
|
int32_t fs_old_npsect; /* # sectors/track including spares */
|
|
int32_t fs_old_interleave; /* hardware sector interleave */
|
|
int32_t fs_old_trackskew; /* sector 0 skew, per track */
|
|
/* fs_id takes the space of unused fs_headswitch and fs_trkseek fields */
|
|
int32_t fs_id[2]; /* unique file system id */
|
|
/* sizes determined by number of cylinder groups and their sizes */
|
|
int32_t fs_old_csaddr; /* blk addr of cyl grp summary area */
|
|
int32_t fs_cssize; /* size of cyl grp summary area */
|
|
int32_t fs_cgsize; /* cylinder group size */
|
|
/* these fields are derived from the hardware */
|
|
int32_t fs_spare2; /* old fs_ntrak */
|
|
int32_t fs_old_nsect; /* sectors per track */
|
|
int32_t fs_old_spc; /* sectors per cylinder */
|
|
int32_t fs_old_ncyl; /* cylinders in file system */
|
|
int32_t fs_old_cpg; /* cylinders per group */
|
|
int32_t fs_ipg; /* inodes per group */
|
|
int32_t fs_fpg; /* blocks per group * fs_frag */
|
|
/* this data must be re-computed after crashes */
|
|
struct csum fs_old_cstotal; /* cylinder summary information */
|
|
/* these fields are cleared at mount time */
|
|
int8_t fs_fmod; /* super block modified flag */
|
|
int8_t fs_clean; /* file system is clean flag */
|
|
int8_t fs_ronly; /* mounted read-only flag */
|
|
uint8_t fs_old_flags; /* see FS_ flags below */
|
|
u_char fs_fsmnt[MAXMNTLEN]; /* name mounted on */
|
|
u_char fs_volname[MAXVOLLEN]; /* volume name */
|
|
uint64_t fs_swuid; /* system-wide uid */
|
|
int32_t fs_pad;
|
|
/* these fields retain the current block allocation info */
|
|
int32_t fs_cgrotor; /* last cg searched (UNUSED) */
|
|
void *fs_ocsp[NOCSPTRS];/* padding; was list of fs_cs buffers */
|
|
uint8_t *fs_contigdirs; /* # of contiguously allocated dirs */
|
|
struct csum *fs_csp; /* cg summary info buffer for fs_cs */
|
|
int32_t *fs_maxcluster; /* max cluster in each cyl group */
|
|
u_char *fs_active; /* used by snapshots to track fs */
|
|
int32_t fs_old_cpc; /* cyl per cycle in postbl */
|
|
/* this area is otherwise allocated unless fs_old_flags & FS_FLAGS_UPDATED */
|
|
int32_t fs_maxbsize; /* maximum blocking factor permitted */
|
|
int64_t fs_sparecon64[17]; /* old rotation block list head */
|
|
int64_t fs_sblockloc; /* byte offset of standard superblock */
|
|
struct csum_total fs_cstotal; /* cylinder summary information */
|
|
int64_t fs_time; /* last time written */
|
|
int64_t fs_size; /* number of blocks in fs */
|
|
int64_t fs_dsize; /* number of data blocks in fs */
|
|
int64_t fs_csaddr; /* blk addr of cyl grp summary area */
|
|
int64_t fs_pendingblocks; /* blocks in process of being freed */
|
|
int32_t fs_pendinginodes; /* inodes in process of being freed */
|
|
int32_t fs_snapinum[FSMAXSNAP]; /* list of snapshot inode numbers */
|
|
/* back to stuff that has been around a while */
|
|
int32_t fs_avgfilesize; /* expected average file size */
|
|
int32_t fs_avgfpdir; /* expected # of files per directory */
|
|
int32_t fs_save_cgsize; /* save real cg size to use fs_bsize */
|
|
int32_t fs_sparecon32[26]; /* reserved for future constants */
|
|
uint32_t fs_flags; /* see FS_ flags below */
|
|
/* back to stuff that has been around a while (again) */
|
|
int32_t fs_contigsumsize; /* size of cluster summary array */
|
|
int32_t fs_maxsymlinklen; /* max length of an internal symlink */
|
|
int32_t fs_old_inodefmt; /* format of on-disk inodes */
|
|
uint64_t fs_maxfilesize; /* maximum representable file size */
|
|
int64_t fs_qbmask; /* ~fs_bmask for use with 64-bit size */
|
|
int64_t fs_qfmask; /* ~fs_fmask for use with 64-bit size */
|
|
int32_t fs_state; /* validate fs_clean field (UNUSED) */
|
|
int32_t fs_old_postblformat; /* format of positional layout tables */
|
|
int32_t fs_old_nrpos; /* number of rotational positions */
|
|
int32_t fs_spare5[2]; /* old fs_postbloff */
|
|
/* old fs_rotbloff */
|
|
int32_t fs_magic; /* magic number */
|
|
};
|
|
.Ed
|
|
.Pp
|
|
Each disk drive contains some number of file systems.
|
|
A file system consists of a number of cylinder groups.
|
|
Each cylinder group has inodes and data.
|
|
.Pp
|
|
A file system is described by its super-block, which in turn
|
|
describes the cylinder groups.
|
|
The super-block is critical data and is replicated in each cylinder
|
|
group to protect against catastrophic loss.
|
|
This is done at file system creation time and the critical super-block
|
|
data does not change, so the copies need not be referenced further
|
|
unless disaster strikes.
|
|
.Pp
|
|
Addresses stored in inodes are capable of addressing fragments
|
|
of `blocks'.
|
|
File system blocks of at most size
|
|
.Dv MAXBSIZE
|
|
can
|
|
be optionally broken into 2, 4, or 8 pieces, each of which is
|
|
addressable; these pieces may be
|
|
.Dv DEV_BSIZE ,
|
|
or some multiple of
|
|
a
|
|
.Dv DEV_BSIZE
|
|
unit.
|
|
.Pp
|
|
Large files consist of exclusively large data blocks.
|
|
To avoid undue wasted disk space, the last data block of a small
|
|
file is allocated as only as many fragments of a large block as
|
|
are necessary.
|
|
The file system format retains only a single pointer to such a
|
|
fragment, which is a piece of a single large block that has been divided.
|
|
The size of such a fragment is determinable from
|
|
information in the inode, using the
|
|
.Fn blksize fs ip lbn
|
|
macro.
|
|
.Pp
|
|
The file system records space availability at the fragment level;
|
|
to determine block availability, aligned fragments are examined.
|
|
.Pp
|
|
The root inode is the root of the file system.
|
|
Inode 0 can't be used for normal purposes and
|
|
historically bad blocks were linked to inode 1,
|
|
thus the root inode is 2 (inode 1 is no longer used for
|
|
this purpose, however numerous dump tapes make this
|
|
assumption, so we are stuck with it).
|
|
.Pp
|
|
The
|
|
.Fa fs_minfree
|
|
element gives the minimum acceptable percentage of file system
|
|
blocks that may be free.
|
|
If the freelist drops below this level
|
|
only the super-user may continue to allocate blocks.
|
|
The
|
|
.Fa fs_minfree
|
|
element
|
|
may be set to 0 if no reserve of free blocks is deemed necessary,
|
|
however severe performance degradations will be observed if the
|
|
file system is run at greater than 90% full; thus the default
|
|
value of
|
|
.Fa fs_minfree
|
|
is 10%.
|
|
.Pp
|
|
Empirically the best trade-off between block fragmentation and
|
|
overall disk utilization at a loading of 90% comes with a
|
|
fragmentation of 8, thus the default fragment size is an eighth
|
|
of the block size.
|
|
.Pp
|
|
The element
|
|
.Fa fs_optim
|
|
specifies whether the file system should try to minimize the time spent
|
|
allocating blocks, or if it should attempt to minimize the space
|
|
fragmentation on the disk.
|
|
If the value of fs_minfree (see above) is less than 10%,
|
|
then the file system defaults to optimizing for space to avoid
|
|
running out of full sized blocks.
|
|
If the value of minfree is greater than or equal to 10%,
|
|
fragmentation is unlikely to be problematical, and
|
|
the file system defaults to optimizing for time.
|
|
.Pp
|
|
.Em Cylinder group related limits :
|
|
Each cylinder keeps track of the availability of blocks at different
|
|
rotational positions, so that sequential blocks can be laid out
|
|
with minimum rotational latency.
|
|
With the default of 8 distinguished
|
|
rotational positions, the resolution of the
|
|
summary information is 2ms for a typical 3600 rpm drive.
|
|
.Pp
|
|
The element
|
|
.Fa fs_rotdelay
|
|
gives the minimum number of milliseconds to initiate
|
|
another disk transfer on the same cylinder.
|
|
It is used in determining the rotationally optimal
|
|
layout for disk blocks within a file;
|
|
the default value for
|
|
.Fa fs_rotdelay
|
|
is 2ms.
|
|
.Pp
|
|
Each file system has a statically allocated number of inodes,
|
|
determined by its size and the desired number of file data bytes per
|
|
inode at the time it was created. See
|
|
.Xr newfs 8
|
|
for details on how to set this (and other) filesystem parameters.
|
|
By default, the inode allocation strategy is extremely conservative.
|
|
.Pp
|
|
.Dv MINBSIZE
|
|
is the smallest allowable block size.
|
|
With a
|
|
.Dv MINBSIZE
|
|
of 4096
|
|
it is possible to create files of size
|
|
2^32 with only two levels of indirection.
|
|
.Dv MINBSIZE
|
|
must be big enough to hold a cylinder group block,
|
|
thus changes to
|
|
.Pq Fa struct cg
|
|
must keep its size within
|
|
.Dv MINBSIZE .
|
|
Note that super-blocks are never more than size
|
|
.Dv SBSIZE .
|
|
.Pp
|
|
The path name on which the file system is mounted is maintained in
|
|
.Fa fs_fsmnt .
|
|
.Dv MAXMNTLEN
|
|
defines the amount of space allocated in
|
|
the super-block for this name.
|
|
The limit on the amount of summary information per file system
|
|
is defined by
|
|
.Dv MAXCSBUFS .
|
|
For a 4096 byte block size, it is currently parameterized for a
|
|
maximum of two million cylinders.
|
|
.Pp
|
|
Per cylinder group information is summarized in blocks allocated
|
|
from the first cylinder group's data blocks.
|
|
These blocks are read in from
|
|
.Fa fs_csaddr
|
|
(size
|
|
.Fa fs_cssize )
|
|
in addition to the super-block.
|
|
.Pp
|
|
.Sy N.B.:
|
|
.Fn sizeof "struct csum"
|
|
must be a power of two in order for
|
|
the
|
|
.Fn fs_cs
|
|
macro to work.
|
|
.Pp
|
|
The
|
|
.Em "Super-block for a file system" :
|
|
The size of the rotational layout tables
|
|
is limited by the fact that the super-block is of size
|
|
.Dv SBSIZE .
|
|
The size of these tables is
|
|
.Em inversely
|
|
proportional to the block size of the file system.
|
|
The size of the tables is increased when sector sizes are not powers
|
|
of two, as this increases the number of cylinders included before
|
|
the rotational pattern repeats
|
|
.Pq Fa fs_cpc .
|
|
The size of the rotational layout
|
|
tables is derived from the number of bytes remaining in
|
|
.Pq Fa struct fs .
|
|
.Pp
|
|
The number of blocks of data per cylinder group
|
|
is limited because cylinder groups are at most one block.
|
|
The inode and free block tables
|
|
must fit into a single block after deducting space for
|
|
the cylinder group structure
|
|
.Pq Fa struct cg .
|
|
.Pp
|
|
The
|
|
.Em Inode :
|
|
The inode is the focus of all file activity in the
|
|
.Ux
|
|
file system.
|
|
There is a unique inode allocated
|
|
for each active file,
|
|
each current directory, each mounted-on file,
|
|
text file, and the root.
|
|
An inode is `named' by its device/i-number pair.
|
|
For further information, see the include file
|
|
.In ufs/ufs/inode.h .
|
|
.Sh SEE ALSO
|
|
.Xr newfs 8
|
|
.Sh HISTORY
|
|
A super-block structure named filsys appeared in
|
|
.At v6 .
|
|
The file system described in this manual appeared
|
|
in
|
|
.Bx 4.2 .
|