From 5370070fded61b569196764673a4fc8440aac79e Mon Sep 17 00:00:00 2001 From: Rich Felker Date: Wed, 13 Mar 2024 10:47:42 -0400 Subject: [PATCH] fix pwrite/pwritev handling of O_APPEND files POSIX requires pwrite to honor the explicit file offset where the write should take place even if the file was opened as O_APPEND. however, linux historically defined the pwrite syscall family as honoring O_APPEND. this cannot be changed on the kernel side due to stability policy, but the addition of the pwritev2 syscall with a flags argument opened the door to fixing it, and linux commit 73fa7547c70b32cc69685f79be31135797734eb6 adds the RWF_NOAPPEND flag that lets us request a write honoring the file offset argument. this patch changes the pwrite function to first attempt using the pwritev2 syscall with RWF_NOAPPEND, falling back to using the old pwrite syscall only after checking that O_APPEND is not set for the open file. if O_APPEND is set, the operation fails with EOPNOTSUPP, reflecting that the kernel does not support the correct behavior. this is an extended error case needed to avoid the wrong behavior that happened before (writing the data at the wrong location), and is aligned with the spirit of the POSIX requirement that "An attempt to perform a pwrite() on a file that is incapable of seeking shall result in an error." since the pwritev2 syscall interprets the offset of -1 as a request to write at the current file offset, it is mapped to a different negative value that will produce the expected error. pwritev, though not governed by POSIX at this time, is adjusted to match pwrite in honoring the offset. --- src/unistd/pwrite.c | 11 +++++++++++ src/unistd/pwritev.c | 10 +++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/unistd/pwrite.c b/src/unistd/pwrite.c index 869b69f0..a008b3ec 100644 --- a/src/unistd/pwrite.c +++ b/src/unistd/pwrite.c @@ -1,7 +1,18 @@ +#define _GNU_SOURCE #include +#include +#include #include "syscall.h" ssize_t pwrite(int fd, const void *buf, size_t size, off_t ofs) { + if (ofs == -1) ofs--; + int r = __syscall_cp(SYS_pwritev2, fd, + (&(struct iovec){ .iov_base = (void *)buf, .iov_len = size }), + 1, (long)(ofs), (long)(ofs>>32), RWF_NOAPPEND); + if (r != -EOPNOTSUPP && r != -ENOSYS) + return __syscall_ret(r); + if (fcntl(fd, F_GETFL) & O_APPEND) + return __syscall_ret(-EOPNOTSUPP); return syscall_cp(SYS_pwrite, fd, buf, size, __SYSCALL_LL_PRW(ofs)); } diff --git a/src/unistd/pwritev.c b/src/unistd/pwritev.c index becf9deb..44a53d85 100644 --- a/src/unistd/pwritev.c +++ b/src/unistd/pwritev.c @@ -1,10 +1,18 @@ -#define _BSD_SOURCE +#define _GNU_SOURCE #include #include +#include #include "syscall.h" ssize_t pwritev(int fd, const struct iovec *iov, int count, off_t ofs) { + if (ofs == -1) ofs--; + int r = __syscall_cp(SYS_pwritev2, fd, iov, count, + (long)(ofs), (long)(ofs>>32), RWF_NOAPPEND); + if (r != -EOPNOTSUPP && r != -ENOSYS) + return __syscall_ret(r); + if (fcntl(fd, F_GETFL) & O_APPEND) + return __syscall_ret(-EOPNOTSUPP); return syscall_cp(SYS_pwritev, fd, iov, count, (long)(ofs), (long)(ofs>>32)); }