Add userlevel manifestations of posix_fallocate and fdiscard, in

unistd.h and libc, and add a man page.

Allow wiggle room in the man page for implementations of fallocate
that either (a) don't fully unwind on failure, leaving new blocks
allocated without changing the file size, or (b) create only in-memory
transient reservations that disappear when crashing or rebooting.

Also, add crossreference to fdiscard from ftruncate(2), and remove the
old BUGS entry from there that called for a generalized version that
allows discarding ranges of a file: that's what fdiscard is.
This commit is contained in:
dholland 2014-07-25 08:30:47 +00:00
parent 53d9b9366b
commit d0352e761c
4 changed files with 179 additions and 12 deletions

View File

@ -1,4 +1,4 @@
/* $NetBSD: unistd.h,v 1.141 2014/06/18 17:48:22 christos Exp $ */
/* $NetBSD: unistd.h,v 1.142 2014/07/25 08:30:47 dholland Exp $ */
/*-
* Copyright (c) 1998, 1999, 2008 The NetBSD Foundation, Inc.
@ -173,6 +173,7 @@ ssize_t readlink(const char * __restrict, char * __restrict, size_t);
*/
#if (_POSIX_C_SOURCE - 0) >= 200112L || (_XOPEN_SOURCE - 0) >= 600 || \
defined(_NETBSD_SOURCE)
int posix_fallocate(int, off_t, off_t);
int setegid(gid_t);
int seteuid(uid_t);
#endif
@ -328,6 +329,7 @@ int dup3(int, int, int);
void endusershell(void);
int exect(const char *, char * const *, char * const *);
int fchroot(int);
int fdiscard(int, off_t, off_t);
int fsync_range(int, int, off_t, off_t);
int getdomainname(char *, size_t);
int getgrouplist(const char *, gid_t, gid_t *, int *);

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile.inc,v 1.223 2014/01/31 20:45:49 christos Exp $
# $NetBSD: Makefile.inc,v 1.224 2014/07/25 08:30:47 dholland Exp $
# @(#)Makefile.inc 8.3 (Berkeley) 10/24/94
# sys sources
@ -147,10 +147,11 @@ ASM_MD= _lwp_getprivate.S mremap.S
.endfor
WEAKASM= accept.S __aio_suspend50.S close.S connect.S execve.S \
fcntl.S fdatasync.S fsync.S fsync_range.S __kevent50.S \
fcntl.S fdatasync.S fdiscard.S fsync.S \
fsync_range.S __kevent50.S \
kill.S mq_receive.S mq_send.S __mq_timedreceive50.S __mq_timedsend50.S \
msgrcv.S msgsnd.S __msync13.S __nanosleep50.S open.S poll.S \
__pollts50.S __pselect50.S read.S readlink.S \
__pollts50.S posix_fallocate.S __pselect50.S read.S readlink.S \
readv.S _sched_setparam.S _sched_getparam.S _sched_setaffinity.S \
_sched_getaffinity.S sched_yield.S \
__select50.S setcontext.S __sigprocmask14.S __sigsuspend14.S sysarch.S \
@ -216,7 +217,7 @@ LintSysPseudoNoerr.c: ${LIBCDIR}/sys/makelintstub \
MAN+= accept.2 access.2 acct.2 adjtime.2 bind.2 brk.2 chdir.2 \
chflags.2 chmod.2 chown.2 chroot.2 clock_settime.2 clone.2 close.2 \
connect.2 dup.2 execve.2 _exit.2 extattr_get_file.2 \
fcntl.2 fdatasync.2 fhopen.2 \
fcntl.2 fdatasync.2 fdiscard.2 fhopen.2 \
flock.2 fork.2 fsync.2 getcontext.2 getdents.2 \
getfh.2 getvfsstat.2 getgid.2 getgroups.2 \
getitimer.2 getlogin.2 getpeername.2 getpgrp.2 getpid.2 \
@ -235,7 +236,8 @@ MAN+= accept.2 access.2 acct.2 adjtime.2 bind.2 brk.2 chdir.2 \
mount.2 \
mprotect.2 mremap.2 msgctl.2 msgget.2 msgrcv.2 msgsnd.2 msync.2 \
munmap.2 nanosleep.2 nfssvc.2 ntp_adjtime.2 open.2 pathconf.2 pipe.2 \
pmc_control.2 poll.2 posix_fadvise.2 profil.2 ptrace.2 __quotactl.2 \
pmc_control.2 poll.2 posix_fadvise.2 \
profil.2 ptrace.2 __quotactl.2 \
rasctl.2 read.2 readlink.2 \
reboot.2 recv.2 rename.2 revoke.2 rmdir.2 \
select.2 semctl.2 \
@ -275,6 +277,7 @@ MLINKS+=extattr_get_file.2 extattr_set_file.2 \
MLINKS+=access.2 faccessat.2
MLINKS+=chmod.2 fchmodat.2
MLINKS+=chown.2 fchownat.2
MLINKS+=fdiscard.2 posix_fallocate.2
MLINKS+=fhopen.2 fhstat.2 fhopen.2 fhstatvfs.2 fhopen.2 fhstatvfs1.2
MLINKS+=stat.2 fstatat.2
MLINKS+=fsync.2 fsync_range.2

164
lib/libc/sys/fdiscard.2 Normal file
View File

@ -0,0 +1,164 @@
.\" $NetBSD: fdiscard.2,v 1.1 2014/07/25 08:30:47 dholland Exp $
.\"
.\" Copyright (c) 2014 The NetBSD Foundation, Inc.
.\" All rights reserved.
.\"
.\" This code is derived from software contributed to The NetBSD Foundation
.\" by David A. Holland.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
.Dd July 25, 2014
.Dt FDISCARD 2
.Os
.Sh NAME
.Nm posix_fallocate ,
.Nm fdiscard
.Nd allocate or discard backing store for files
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
.In unistd.h
.Ft int
.Fn posix_fallocate "int fd" "off_t pos" "off_t length"
.Ft int
.Fn fdiscard "int fd" "off_t pos" "off_t length"
.Sh DESCRIPTION
The
.Fn posix_fallocate
call allocates backing store for the file referenced by
.Fa fd
in the region starting at
.Fa pos
bytes from the start of the file and continuing for
.Fa length
bytes more.
If the region extends past the current end of file, the file size is
increased to cover the region.
.Pp
The
.Fn fdiscard
call discards backing store for the file referenced by
.Fa fd
in the region starting at
.Fa pos
bytes from the start of the file and continuing for
.Fa length
bytes more.
The file size is not affected.
.Pp
Both calls operate on the basis of file system blocks, so
.Fn posix_fallocate
may allocate more physical space than requested and
.Fn fdiscard
may discard less physical space than requested.
.Pp
When
.Fn posix_fallocate
is applied to an unallocated region in a regular file (a
.Dq hole ),
the hole is filled and the visible contents are unaffected; both holes
and newly allocated regions read as all zeros.
If
.Fn posix_fallocate
is applied to an already-allocated region in a regular file,
it has no effect.
.Pp
When
.Fn fdiscard
is applied to a regular file, a hole is created and any data in the
affected region is thrown away.
Subsequent reads of the region return zeros.
.Pp
If
.Fn fdiscard
is applied to a device, and the device supports an underlying discard
operation, that operation is invoked.
For example, ATA flash devices and solid-state disks support an
operation called TRIM that discards blocks at the device level.
The behavior of blocks discarded at this level is
implementation-defined; as devices vary, specific behavior should not
be relied upon.
Subsequent reads of the same block may return zeros; such reads may
also, however, continue to return the previously written data, or
return other data, or return indeterminate garbage; or may switch
between any of these behaviors at unpredictable points later on.
.Pp
For both calls, the file
.Fa fd
must be open for writing and may not be a directory or socket.
.Sh RESTRICTIONS
Because there is no way for
.Fn posix_fallocate
to report a partial failure, errors may require some or all of the
work it has already done to be unwound, which may be expensive.
It is recommended to set the file length first with
.Xr ftruncate 2
and only then allocate space within the file using
.Fn posix_fallocate .
.Pp
Depending on the implementation, even a failing call to
.Fn posix_fallocate
may allocate some space to the target file.
Such a call will not, however, change the file size.
.Pp
Furthermore, in some implementations, the space reservations created
by
.Fn posix_fallocate
may not be persistent after a crash or reboot if the space reserved
has not yet been written to.
.Sh RETURN VALUES
On success these calls return 0.
On error, \-1 is returned, and the global variable
.Va errno
is set to indicate the error.
.Sh ERRORS
.Bl -tag -width Er
.It Bq Er EBADF
The file handle
.Fa fd
is invalid or not open for writing.
.It Bq Er EINVAL
The position and/or length values are negative.
.It Bq Er EISDIR
The selected file is a directory.
.It Bq Er EIO
A hardware-level I/O error occurred.
.It Bq Er ENOSPC
There was no space in the filesystem to complete the operation.
.It Bq Er EDQUOT
Allocating the requested blocks would exceed the user's quota.
.El
.Sh SEE ALSO
.Xr ftruncate 2
.Sh HISTORY
The
.Fn posix_fallocate
and
.Fn fdiscard
function calls appeared in
.Nx 7.0 .
Similar functions appeared previously in Linux.
The
.Fn posix_fallocate
function is expected to conform to
.St -p1003.1-2004 .

View File

@ -1,4 +1,4 @@
.\" $NetBSD: truncate.2,v 1.28 2014/06/14 19:02:29 wiz Exp $
.\" $NetBSD: truncate.2,v 1.29 2014/07/25 08:30:47 dholland Exp $
.\"
.\" Copyright (c) 1983, 1991, 1993
.\" The Regents of the University of California. All rights reserved.
@ -29,7 +29,7 @@
.\"
.\" @(#)truncate.2 8.1 (Berkeley) 6/4/93
.\"
.Dd June 14, 2014
.Dd July 23, 2014
.Dt TRUNCATE 2
.Os
.Sh NAME
@ -133,7 +133,8 @@ the
is not open for writing.
.El
.Sh SEE ALSO
.Xr open 2
.Xr open 2 ,
.Xr fdiscard 2
.Sh STANDARDS
Use of
.Fn truncate
@ -151,6 +152,3 @@ and
.Fn ftruncate
function calls appeared in
.Bx 4.2 .
.Sh BUGS
These calls should be generalized to allow ranges
of bytes in a file to be discarded.