Provide pg_preadv() and pg_pwritev().
Provide synchronous vectored file I/O routines. These map to preadv() and pwritev(), with fallback implementations for systems that don't have them. Also provide a wrapper pg_pwritev_with_retry() that automatically retries on short writes. Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/CA%2BhUKGJA%2Bu-220VONeoREBXJ9P3S94Y7J%2BkqCnTYmahvZJwM%3Dg%40mail.gmail.com
This commit is contained in:
parent
01334c92fa
commit
13a021f3e8
30
configure
vendored
30
configure
vendored
@ -13061,7 +13061,7 @@ $as_echo "#define HAVE_STDBOOL_H 1" >>confdefs.h
|
||||
fi
|
||||
|
||||
|
||||
for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h mbarrier.h poll.h sys/epoll.h sys/event.h sys/ipc.h sys/prctl.h sys/procctl.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/sockio.h sys/tas.h sys/un.h termios.h ucred.h wctype.h
|
||||
for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h langinfo.h mbarrier.h poll.h sys/epoll.h sys/event.h sys/ipc.h sys/prctl.h sys/procctl.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/sockio.h sys/tas.h sys/uio.h sys/un.h termios.h ucred.h wctype.h
|
||||
do :
|
||||
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
|
||||
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
|
||||
@ -15155,7 +15155,7 @@ fi
|
||||
LIBS_including_readline="$LIBS"
|
||||
LIBS=`echo "$LIBS" | sed -e 's/-ledit//g' -e 's/-lreadline//g'`
|
||||
|
||||
for ac_func in backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pstat pthread_is_threaded_np readlink setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlink sync_file_range uselocale wcstombs_l
|
||||
for ac_func in backtrace_symbols clock_gettime copyfile fdatasync getifaddrs getpeerucred getrlimit kqueue mbstowcs_l memset_s poll posix_fallocate ppoll pread preadv pstat pthread_is_threaded_np pwrite pwritev readlink readv setproctitle setproctitle_fast setsid shm_open strchrnul strsignal symlink sync_file_range uselocale wcstombs_l writev
|
||||
do :
|
||||
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
|
||||
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
|
||||
@ -15832,32 +15832,6 @@ esac
|
||||
|
||||
fi
|
||||
|
||||
ac_fn_c_check_func "$LINENO" "pread" "ac_cv_func_pread"
|
||||
if test "x$ac_cv_func_pread" = xyes; then :
|
||||
$as_echo "#define HAVE_PREAD 1" >>confdefs.h
|
||||
|
||||
else
|
||||
case " $LIBOBJS " in
|
||||
*" pread.$ac_objext "* ) ;;
|
||||
*) LIBOBJS="$LIBOBJS pread.$ac_objext"
|
||||
;;
|
||||
esac
|
||||
|
||||
fi
|
||||
|
||||
ac_fn_c_check_func "$LINENO" "pwrite" "ac_cv_func_pwrite"
|
||||
if test "x$ac_cv_func_pwrite" = xyes; then :
|
||||
$as_echo "#define HAVE_PWRITE 1" >>confdefs.h
|
||||
|
||||
else
|
||||
case " $LIBOBJS " in
|
||||
*" pwrite.$ac_objext "* ) ;;
|
||||
*) LIBOBJS="$LIBOBJS pwrite.$ac_objext"
|
||||
;;
|
||||
esac
|
||||
|
||||
fi
|
||||
|
||||
ac_fn_c_check_func "$LINENO" "random" "ac_cv_func_random"
|
||||
if test "x$ac_cv_func_random" = xyes; then :
|
||||
$as_echo "#define HAVE_RANDOM 1" >>confdefs.h
|
||||
|
@ -1331,6 +1331,7 @@ AC_CHECK_HEADERS(m4_normalize([
|
||||
sys/shm.h
|
||||
sys/sockio.h
|
||||
sys/tas.h
|
||||
sys/uio.h
|
||||
sys/un.h
|
||||
termios.h
|
||||
ucred.h
|
||||
@ -1660,9 +1661,14 @@ AC_CHECK_FUNCS(m4_normalize([
|
||||
poll
|
||||
posix_fallocate
|
||||
ppoll
|
||||
pread
|
||||
preadv
|
||||
pstat
|
||||
pthread_is_threaded_np
|
||||
pwrite
|
||||
pwritev
|
||||
readlink
|
||||
readv
|
||||
setproctitle
|
||||
setproctitle_fast
|
||||
setsid
|
||||
@ -1673,6 +1679,7 @@ AC_CHECK_FUNCS(m4_normalize([
|
||||
sync_file_range
|
||||
uselocale
|
||||
wcstombs_l
|
||||
writev
|
||||
]))
|
||||
|
||||
# These typically are compiler builtins, for which AC_CHECK_FUNCS fails.
|
||||
@ -1733,8 +1740,6 @@ AC_REPLACE_FUNCS(m4_normalize([
|
||||
inet_aton
|
||||
link
|
||||
mkdtemp
|
||||
pread
|
||||
pwrite
|
||||
random
|
||||
srandom
|
||||
strlcat
|
||||
|
@ -412,6 +412,9 @@
|
||||
/* Define to 1 if you have the `pread' function. */
|
||||
#undef HAVE_PREAD
|
||||
|
||||
/* Define to 1 if you have the `preadv' function. */
|
||||
#undef HAVE_PREADV
|
||||
|
||||
/* Define to 1 if you have the `pstat' function. */
|
||||
#undef HAVE_PSTAT
|
||||
|
||||
@ -430,6 +433,9 @@
|
||||
/* Define to 1 if you have the `pwrite' function. */
|
||||
#undef HAVE_PWRITE
|
||||
|
||||
/* Define to 1 if you have the `pwritev' function. */
|
||||
#undef HAVE_PWRITEV
|
||||
|
||||
/* Define to 1 if you have the `random' function. */
|
||||
#undef HAVE_RANDOM
|
||||
|
||||
@ -445,6 +451,9 @@
|
||||
/* Define to 1 if you have the `readlink' function. */
|
||||
#undef HAVE_READLINK
|
||||
|
||||
/* Define to 1 if you have the `readv' function. */
|
||||
#undef HAVE_READV
|
||||
|
||||
/* Define to 1 if you have the global variable
|
||||
'rl_completion_append_character'. */
|
||||
#undef HAVE_RL_COMPLETION_APPEND_CHARACTER
|
||||
@ -629,6 +638,9 @@
|
||||
/* Define to 1 if you have the <sys/ucred.h> header file. */
|
||||
#undef HAVE_SYS_UCRED_H
|
||||
|
||||
/* Define to 1 if you have the <sys/uio.h> header file. */
|
||||
#undef HAVE_SYS_UIO_H
|
||||
|
||||
/* Define to 1 if you have the <sys/un.h> header file. */
|
||||
#undef HAVE_SYS_UN_H
|
||||
|
||||
@ -683,6 +695,9 @@
|
||||
/* Define to 1 if you have the <winldap.h> header file. */
|
||||
#undef HAVE_WINLDAP_H
|
||||
|
||||
/* Define to 1 if you have the `writev' function. */
|
||||
#undef HAVE_WRITEV
|
||||
|
||||
/* Define to 1 if you have the `X509_get_signature_nid' function. */
|
||||
#undef HAVE_X509_GET_SIGNATURE_NID
|
||||
|
||||
|
@ -431,6 +431,8 @@ extern ssize_t pg_pread(int fd, void *buf, size_t nbyte, off_t offset);
|
||||
extern ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, off_t offset);
|
||||
#endif
|
||||
|
||||
/* For pg_pwritev() and pg_preadv(), see port/pg_iovec.h. */
|
||||
|
||||
#if !HAVE_DECL_STRLCAT
|
||||
extern size_t strlcat(char *dst, const char *src, size_t siz);
|
||||
#endif
|
||||
|
59
src/include/port/pg_iovec.h
Normal file
59
src/include/port/pg_iovec.h
Normal file
@ -0,0 +1,59 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pg_iovec.h
|
||||
* Header for the vectored I/O functions in src/port/p{read,write}.c.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* src/include/port/pg_iovec.h
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#ifndef PG_IOVEC_H
|
||||
#define PG_IOVEC_H
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#ifdef HAVE_SYS_UIO_H
|
||||
#include <sys/uio.h>
|
||||
#endif
|
||||
|
||||
/* If <sys/uio.h> is missing, define our own POSIX-compatible iovec struct. */
|
||||
#ifndef HAVE_SYS_UIO_H
|
||||
struct iovec
|
||||
{
|
||||
void *iov_base;
|
||||
size_t iov_len;
|
||||
};
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If <limits.h> didn't define IOV_MAX, define our own. POSIX requires at
|
||||
* least 16.
|
||||
*/
|
||||
#ifndef IOV_MAX
|
||||
#define IOV_MAX 16
|
||||
#endif
|
||||
|
||||
/* Define a reasonable maximum that is safe to use on the stack. */
|
||||
#define PG_IOV_MAX Min(IOV_MAX, 32)
|
||||
|
||||
#ifdef HAVE_PREADV
|
||||
#define pg_preadv preadv
|
||||
#else
|
||||
extern ssize_t pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset);
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_PWRITEV
|
||||
#define pg_pwritev pwritev
|
||||
#else
|
||||
extern ssize_t pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset);
|
||||
#endif
|
||||
|
||||
extern ssize_t pg_pwritev_with_retry(int fd,
|
||||
const struct iovec *iov,
|
||||
int iovcnt,
|
||||
off_t offset);
|
||||
|
||||
#endif /* PG_IOVEC_H */
|
@ -53,6 +53,8 @@ OBJS = \
|
||||
pgstrcasecmp.o \
|
||||
pgstrsignal.o \
|
||||
pqsignal.o \
|
||||
pread.o \
|
||||
pwrite.o \
|
||||
qsort.o \
|
||||
qsort_arg.o \
|
||||
quotes.o \
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pread.c
|
||||
* Implementation of pread(2) for platforms that lack one.
|
||||
* Implementation of pread[v](2) for platforms that lack one.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
||||
*
|
||||
@ -9,7 +9,8 @@
|
||||
* src/port/pread.c
|
||||
*
|
||||
* Note that this implementation changes the current file position, unlike
|
||||
* the POSIX function, so we use the name pg_pread().
|
||||
* the POSIX function, so we use the name pg_pread(). Likewise for the
|
||||
* iovec version.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -23,6 +24,9 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "port/pg_iovec.h"
|
||||
|
||||
#ifndef HAVE_PREAD
|
||||
ssize_t
|
||||
pg_pread(int fd, void *buf, size_t size, off_t offset)
|
||||
{
|
||||
@ -56,3 +60,38 @@ pg_pread(int fd, void *buf, size_t size, off_t offset)
|
||||
return read(fd, buf, size);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_PREADV
|
||||
ssize_t
|
||||
pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
{
|
||||
#ifdef HAVE_READV
|
||||
if (iovcnt == 1)
|
||||
return pg_pread(fd, iov[0].iov_base, iov[0].iov_len, offset);
|
||||
if (lseek(fd, offset, SEEK_SET) < 0)
|
||||
return -1;
|
||||
return readv(fd, iov, iovcnt);
|
||||
#else
|
||||
ssize_t sum = 0;
|
||||
ssize_t part;
|
||||
|
||||
for (int i = 0; i < iovcnt; ++i)
|
||||
{
|
||||
part = pg_pread(fd, iov[i].iov_base, iov[i].iov_len, offset);
|
||||
if (part < 0)
|
||||
{
|
||||
if (i == 0)
|
||||
return -1;
|
||||
else
|
||||
return sum;
|
||||
}
|
||||
sum += part;
|
||||
offset += part;
|
||||
if (part < iov[i].iov_len)
|
||||
return sum;
|
||||
}
|
||||
return sum;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* pwrite.c
|
||||
* Implementation of pwrite(2) for platforms that lack one.
|
||||
* Implementation of pwrite[v](2) for platforms that lack one.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
|
||||
*
|
||||
@ -9,7 +9,8 @@
|
||||
* src/port/pwrite.c
|
||||
*
|
||||
* Note that this implementation changes the current file position, unlike
|
||||
* the POSIX function, so we use the name pg_pwrite().
|
||||
* the POSIX function, so we use the name pg_pwrite(). Likewise for the
|
||||
* iovec version.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -23,6 +24,9 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "port/pg_iovec.h"
|
||||
|
||||
#ifndef HAVE_PWRITE
|
||||
ssize_t
|
||||
pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
|
||||
{
|
||||
@ -53,3 +57,102 @@ pg_pwrite(int fd, const void *buf, size_t size, off_t offset)
|
||||
return write(fd, buf, size);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_PWRITEV
|
||||
ssize_t
|
||||
pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
{
|
||||
#ifdef HAVE_WRITEV
|
||||
if (iovcnt == 1)
|
||||
return pg_pwrite(fd, iov[0].iov_base, iov[0].iov_len, offset);
|
||||
if (lseek(fd, offset, SEEK_SET) < 0)
|
||||
return -1;
|
||||
return writev(fd, iov, iovcnt);
|
||||
#else
|
||||
ssize_t sum = 0;
|
||||
ssize_t part;
|
||||
|
||||
for (int i = 0; i < iovcnt; ++i)
|
||||
{
|
||||
part = pg_pwrite(fd, iov[i].iov_base, iov[i].iov_len, offset);
|
||||
if (part < 0)
|
||||
{
|
||||
if (i == 0)
|
||||
return -1;
|
||||
else
|
||||
return sum;
|
||||
}
|
||||
sum += part;
|
||||
offset += part;
|
||||
if (part < iov[i].iov_len)
|
||||
return sum;
|
||||
}
|
||||
return sum;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* A convenience wrapper for pg_pwritev() that retries on partial write. If an
|
||||
* error is returned, it is unspecified how much has been written.
|
||||
*/
|
||||
ssize_t
|
||||
pg_pwritev_with_retry(int fd, const struct iovec *iov, int iovcnt, off_t offset)
|
||||
{
|
||||
struct iovec iov_copy[PG_IOV_MAX];
|
||||
ssize_t sum = 0;
|
||||
ssize_t part;
|
||||
|
||||
/* We'd better have space to make a copy, in case we need to retry. */
|
||||
if (iovcnt > PG_IOV_MAX)
|
||||
{
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* Write as much as we can. */
|
||||
part = pg_pwritev(fd, iov, iovcnt, offset);
|
||||
if (part < 0)
|
||||
return -1;
|
||||
|
||||
#ifdef SIMULATE_SHORT_WRITE
|
||||
part = Min(part, 4096);
|
||||
#endif
|
||||
|
||||
/* Count our progress. */
|
||||
sum += part;
|
||||
offset += part;
|
||||
|
||||
/* Step over iovecs that are done. */
|
||||
while (iovcnt > 0 && iov->iov_len <= part)
|
||||
{
|
||||
part -= iov->iov_len;
|
||||
++iov;
|
||||
--iovcnt;
|
||||
}
|
||||
|
||||
/* Are they all done? */
|
||||
if (iovcnt == 0)
|
||||
{
|
||||
if (part > 0)
|
||||
elog(ERROR, "unexpectedly wrote more than requested");
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move whatever's left to the front of our mutable copy and adjust the
|
||||
* leading iovec.
|
||||
*/
|
||||
Assert(iovcnt > 0);
|
||||
memmove(iov_copy, iov, sizeof(*iov) * iovcnt);
|
||||
Assert(iov->iov_len > part);
|
||||
iov_copy[0].iov_base = (char *) iov_copy[0].iov_base + part;
|
||||
iov_copy[0].iov_len -= part;
|
||||
iov = iov_copy;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
@ -329,17 +329,20 @@ sub GenerateFiles
|
||||
HAVE_PPC_LWARX_MUTEX_HINT => undef,
|
||||
HAVE_PPOLL => undef,
|
||||
HAVE_PREAD => undef,
|
||||
HAVE_PREADV => undef,
|
||||
HAVE_PSTAT => undef,
|
||||
HAVE_PS_STRINGS => undef,
|
||||
HAVE_PTHREAD => undef,
|
||||
HAVE_PTHREAD_IS_THREADED_NP => undef,
|
||||
HAVE_PTHREAD_PRIO_INHERIT => undef,
|
||||
HAVE_PWRITE => undef,
|
||||
HAVE_PWRITEV => undef,
|
||||
HAVE_RANDOM => undef,
|
||||
HAVE_READLINE_H => undef,
|
||||
HAVE_READLINE_HISTORY_H => undef,
|
||||
HAVE_READLINE_READLINE_H => undef,
|
||||
HAVE_READLINK => undef,
|
||||
HAVE_READV => undef,
|
||||
HAVE_RL_COMPLETION_APPEND_CHARACTER => undef,
|
||||
HAVE_RL_COMPLETION_MATCHES => undef,
|
||||
HAVE_RL_COMPLETION_SUPPRESS_QUOTE => undef,
|
||||
@ -400,6 +403,7 @@ sub GenerateFiles
|
||||
HAVE_SYS_TAS_H => undef,
|
||||
HAVE_SYS_TYPES_H => 1,
|
||||
HAVE_SYS_UCRED_H => undef,
|
||||
HAVE_SYS_UIO_H => undef,
|
||||
HAVE_SYS_UN_H => undef,
|
||||
HAVE_TERMIOS_H => undef,
|
||||
HAVE_TYPEOF => undef,
|
||||
@ -418,6 +422,7 @@ sub GenerateFiles
|
||||
HAVE_WINLDAP_H => undef,
|
||||
HAVE_WCSTOMBS_L => 1,
|
||||
HAVE_WCTYPE_H => 1,
|
||||
HAVE_WRITEV => undef,
|
||||
HAVE_X509_GET_SIGNATURE_NID => 1,
|
||||
HAVE_X86_64_POPCNTQ => undef,
|
||||
HAVE__BOOL => undef,
|
||||
|
Loading…
x
Reference in New Issue
Block a user