Add memfd_create(2) from GSoC 2023 by Theodore Preduta
This commit is contained in:
parent
8d0cdbc342
commit
7eace3da0c
|
@ -1,4 +1,4 @@
|
|||
# $NetBSD: mi,v 1.2436 2023/07/08 17:43:13 christos Exp $
|
||||
# $NetBSD: mi,v 1.2437 2023/07/10 02:31:54 christos Exp $
|
||||
#
|
||||
# Note: don't delete entries from here - mark them as "obsolete" instead.
|
||||
./etc/mtree/set.comp comp-sys-root
|
||||
|
@ -4826,6 +4826,7 @@
|
|||
./usr/share/man/cat2/lutimes.0 comp-c-catman .cat
|
||||
./usr/share/man/cat2/m68k_sync_icache.0 comp-c-catman .cat
|
||||
./usr/share/man/cat2/madvise.0 comp-c-catman .cat
|
||||
./usr/share/man/cat2/memfd_create.0 comp-c-catman .cat
|
||||
./usr/share/man/cat2/mincore.0 comp-c-catman .cat
|
||||
./usr/share/man/cat2/minherit.0 comp-c-catman .cat
|
||||
./usr/share/man/cat2/mkdir.0 comp-c-catman .cat
|
||||
|
@ -13315,6 +13316,7 @@
|
|||
./usr/share/man/html2/lutimes.html comp-c-htmlman html
|
||||
./usr/share/man/html2/m68k_sync_icache.html comp-c-htmlman html
|
||||
./usr/share/man/html2/madvise.html comp-c-htmlman html
|
||||
./usr/share/man/html2/memfd_create.html comp-c-htmlman html
|
||||
./usr/share/man/html2/mincore.html comp-c-htmlman html
|
||||
./usr/share/man/html2/minherit.html comp-c-htmlman html
|
||||
./usr/share/man/html2/mkdir.html comp-c-htmlman html
|
||||
|
@ -21632,6 +21634,7 @@
|
|||
./usr/share/man/man2/lutimes.2 comp-c-man .man
|
||||
./usr/share/man/man2/m68k_sync_icache.2 comp-c-man .man
|
||||
./usr/share/man/man2/madvise.2 comp-c-man .man
|
||||
./usr/share/man/man2/memfd_create.2 comp-c-man .man
|
||||
./usr/share/man/man2/mincore.2 comp-c-man .man
|
||||
./usr/share/man/man2/minherit.2 comp-c-man .man
|
||||
./usr/share/man/man2/mkdir.2 comp-c-man .man
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# $NetBSD: Makefile.inc,v 1.250 2021/11/01 05:53:45 thorpej Exp $
|
||||
# $NetBSD: Makefile.inc,v 1.251 2023/07/10 02:31:54 christos Exp $
|
||||
# @(#)Makefile.inc 8.3 (Berkeley) 10/24/94
|
||||
|
||||
# sys sources
|
||||
|
@ -134,7 +134,8 @@ ASM=\
|
|||
_lwp_unpark_all.S _lwp_suspend.S _lwp_continue.S \
|
||||
_lwp_wakeup.S _lwp_detach.S _lwp_setprivate.S \
|
||||
_lwp_setname.S _lwp_getname.S _lwp_ctl.S \
|
||||
madvise.S mincore.S minherit.S mkdir.S mkdirat.S mkfifo.S mkfifoat.S \
|
||||
madvise.S memfd_create.S mincore.S minherit.S mkdir.S mkdirat.S \
|
||||
mkfifo.S mkfifoat.S \
|
||||
__mknod50.S mlock.S mlockall.S modctl.S __mount50.S \
|
||||
mprotect.S __msgctl50.S msgget.S munlock.S munlockall.S \
|
||||
munmap.S \
|
||||
|
@ -275,7 +276,7 @@ MAN+= accept.2 access.2 acct.2 adjtime.2 bind.2 brk.2 chdir.2 \
|
|||
_lwp_suspend.2 _lwp_wakeup.2 _lwp_wait.2 _lwp_kill.2 \
|
||||
_lwp_getname.2 _lwp_getprivate.2 \
|
||||
_lwp_park.2 _lwp_unpark.2 _lwp_unpark_all.2 \
|
||||
mkdir.2 mkfifo.2 mknod.2 \
|
||||
memfd_create.2 mkdir.2 mkfifo.2 mknod.2 \
|
||||
madvise.2 mincore.2 minherit.2 mlock.2 mlockall.2 mmap.2 modctl.2 \
|
||||
mount.2 \
|
||||
mprotect.2 mremap.2 msgctl.2 msgget.2 msgrcv.2 msgsnd.2 msync.2 \
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
.\" $NetBSD: fcntl.2,v 1.49 2022/12/04 19:01:19 uwe Exp $
|
||||
.\" $NetBSD: fcntl.2,v 1.50 2023/07/10 02:31:54 christos Exp $
|
||||
.\"
|
||||
.\" Copyright (c) 1983, 1993
|
||||
.\" The Regents of the University of California. All rights reserved.
|
||||
|
@ -29,7 +29,7 @@
|
|||
.\"
|
||||
.\" @(#)fcntl.2 8.2 (Berkeley) 1/12/94
|
||||
.\"
|
||||
.Dd September 26, 2019
|
||||
.Dd July 5, 2023
|
||||
.Dt FCNTL 2
|
||||
.Os
|
||||
.Sh NAME
|
||||
|
@ -162,6 +162,24 @@ in the buffer pointed to by
|
|||
.Fa arg
|
||||
should be pointing to a buffer of at least
|
||||
.Dv MAXPATHLEN .
|
||||
.It Dv F_ADD_SEALS
|
||||
Add seals specified in
|
||||
.Fa arg
|
||||
to
|
||||
.Fa fd
|
||||
to restrict possible operations on
|
||||
.Fa fd
|
||||
as described below.
|
||||
Like flags, multiple seals can be specified at once.
|
||||
Additionally, specifying seals that are already associated with
|
||||
.Fa fd
|
||||
is a no-op.
|
||||
.It Dv F_GET_SEALS
|
||||
Get the seals currently associated with
|
||||
.Fa fd
|
||||
as described below
|
||||
.Fa ( arg
|
||||
is ignored).
|
||||
.El
|
||||
.Pp
|
||||
The set of valid flags for the
|
||||
|
@ -324,13 +342,44 @@ or an
|
|||
request fails or blocks respectively when another process has existing
|
||||
locks on bytes in the specified region and the type of any of those
|
||||
locks conflicts with the type specified in the request.
|
||||
.Pp
|
||||
Possible seals are:
|
||||
.Bl -tag -width F_SEAL_FUTURE_WRITE
|
||||
.It Dv F_SEAL_SEAL
|
||||
Prevent any further seals from being added to
|
||||
.Fa fd .
|
||||
.It Dv F_SEAL_SHRINK
|
||||
Prevent the size of
|
||||
.Fa fd
|
||||
from decreasing.
|
||||
.It Dv F_SEAL_GROW
|
||||
Prevent the size of
|
||||
.Fa fd
|
||||
from increasing.
|
||||
.It Dv F_SEAL_WRITE
|
||||
Prevent any write operations to
|
||||
.Fa fd .
|
||||
.Dv F_SEAL_WRITE
|
||||
cannot be applied if
|
||||
.Fa fd
|
||||
has any memory mappings.
|
||||
.It Dv F_SEAL_FUTURE_WRITE
|
||||
Like
|
||||
.Dv F_SEAL_WRITE
|
||||
but allow any current memory mappings of
|
||||
.Fa fd
|
||||
to remain open, including those with
|
||||
.Dv PROT_WRITE .
|
||||
.El
|
||||
.Sh NOTES
|
||||
The
|
||||
.Dv F_GETPATH
|
||||
functionality is implemented using the reverse
|
||||
For
|
||||
.Dv F_GETPATH :
|
||||
.Bl -bullet -compact
|
||||
.It
|
||||
For vnodes, functionality is implemented using the reverse
|
||||
.Xr namei 9
|
||||
cache.
|
||||
The implications of this are:
|
||||
The implications of this are
|
||||
.Bl -bullet -compact
|
||||
.It
|
||||
For hard links where the file descriptor can resolve to multiple pathnames,
|
||||
|
@ -341,16 +390,25 @@ may fail if the corresponding entry has been evicted from the LRU
|
|||
.Xr namei 9
|
||||
cache and return
|
||||
.Er ENOENT .
|
||||
.El
|
||||
.It
|
||||
File descriptors that don't point to vnodes are not handled, as
|
||||
well as symbolic links since there is currently no way to obtain
|
||||
a file descriptor pointing to a symbolic link.
|
||||
For a file descriptor created by
|
||||
.Xr memfd_create 2 ,
|
||||
the name provided at
|
||||
.Fa fd
|
||||
creation, with the prefix
|
||||
.Dq memfd:
|
||||
is used.
|
||||
.It
|
||||
Other types of file descriptors are not handled, as well as symbolic
|
||||
links since there is currently no way to obtain a file descriptor
|
||||
pointing to a symbolic link.
|
||||
.El
|
||||
.Sh RETURN VALUES
|
||||
Upon successful completion, the value returned depends on
|
||||
.Fa cmd
|
||||
as follows:
|
||||
.Bl -tag -width F_GETOWNX -offset indent
|
||||
.Bl -tag -width F_GET_SEALS -offset indent
|
||||
.It Dv F_DUPFD
|
||||
A new file descriptor.
|
||||
.It Dv F_GETFD
|
||||
|
@ -361,6 +419,9 @@ Value of flags.
|
|||
Value of file descriptor owner.
|
||||
.It Dv F_MAXFD
|
||||
Value of the highest file descriptor open by the process.
|
||||
.It Dv F_GET_SEALS
|
||||
Value of the seals currently associated with
|
||||
.Fa fd .
|
||||
.It other
|
||||
Value other than \-1.
|
||||
.El
|
||||
|
@ -473,6 +534,18 @@ is an exclusive lock
|
|||
and
|
||||
.Fa fildes
|
||||
is not a valid file descriptor open for writing.
|
||||
.It Bq Er EBUSY
|
||||
The argument
|
||||
.Fa cmd
|
||||
is
|
||||
.Dv F_ADD_SEALS ,
|
||||
.Fa arg
|
||||
contains
|
||||
.Dv F_SEAL_WRITE
|
||||
and
|
||||
.Fa fd
|
||||
is currently mapped by
|
||||
.Xr mmap 2 .
|
||||
.It Bq Er EDEADLK
|
||||
The argument
|
||||
.Fa cmd
|
||||
|
@ -512,6 +585,24 @@ and the data to which
|
|||
points is not valid, or
|
||||
.Fa fildes
|
||||
refers to a file that does not support locking.
|
||||
.Pp
|
||||
The argument
|
||||
.Fa cmd
|
||||
is
|
||||
.Dv F_ADD_SEALS
|
||||
or
|
||||
.Dv F_GET_SEALS
|
||||
and
|
||||
.Fa fd
|
||||
does not support seals.
|
||||
.Pp
|
||||
The argument
|
||||
.Fa cmd
|
||||
is
|
||||
.Dv F_ADD_SEALS
|
||||
and
|
||||
.Fa arg
|
||||
contains set bits for unsupported seals.
|
||||
.It Bq Er EMFILE
|
||||
The argument
|
||||
.Fa cmd
|
||||
|
@ -562,6 +653,15 @@ has been reached.
|
|||
It can be modified using the
|
||||
.Li kern.maxfiles
|
||||
.Xr sysctl 7 .
|
||||
.It Bq Er EPERM
|
||||
The argument
|
||||
.Fa cmd
|
||||
is
|
||||
.Dv F_ADD_SEALS
|
||||
and
|
||||
.Fa fd
|
||||
already has
|
||||
.Dv F_SEAL_SEAL .
|
||||
.It Bq Er ERANGE
|
||||
The argument
|
||||
.Fa cmd
|
||||
|
|
|
@ -0,0 +1,125 @@
|
|||
.\" $NetBSD: memfd_create.2,v 1.1 2023/07/10 02:31:54 christos Exp $
|
||||
.\"
|
||||
.\" Copyright (c) 2023 The NetBSD Foundation, Inc.
|
||||
.\" All rights reserved.
|
||||
.\"
|
||||
.\" This code is derived from software contributed to The NetBSD Foundation
|
||||
.\" by Theodore Preduta.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
.\" POSSIBILITY OF SUCH DAMAGE.
|
||||
.\"
|
||||
.Dd July 5, 2023
|
||||
.Dt MEMFD_CREATE 2
|
||||
.Os
|
||||
.Sh NAME
|
||||
.Nm memfd_create
|
||||
.Nd create anonymous files
|
||||
.Sh LIBRARY
|
||||
.Lb libc
|
||||
.Sh SYNOPSIS
|
||||
.In sys/mman.h
|
||||
.Ft int
|
||||
.Fn memfd_create "const char *name" "unsigned int flags"
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Fn memfd_create
|
||||
system call returns a file descriptor to a file named
|
||||
.Fa name
|
||||
backed only by RAM.
|
||||
Initially, the size of the file is zero.
|
||||
.Pp
|
||||
The length of
|
||||
.Fa name
|
||||
must not exceed
|
||||
.Dv NAME_MAX-6
|
||||
characters in length, to allow for the prefix
|
||||
.Dq memfd:
|
||||
to be added.
|
||||
But since the file descriptor does not live on disk,
|
||||
.Fa name
|
||||
does not have to be unique.
|
||||
.Fa name
|
||||
is only intended to be used for debugging purposes and commands like
|
||||
.Xr fstat 1 .
|
||||
.Pp
|
||||
Additionally, any of the following may be specified as the
|
||||
.Fa flags :
|
||||
.Bl -tag -width MFD_ALLOW_SEALING
|
||||
.It Dv MFD_CLOEXEC
|
||||
Set the
|
||||
.Xr close 2
|
||||
on
|
||||
.Xr exec 3
|
||||
flag.
|
||||
.It Dv MFD_ALLOW_SEALING
|
||||
Allow adding seals to the file descriptor using the
|
||||
.Xr fcntl 2
|
||||
.Dv F_ADD_SEALS
|
||||
command.
|
||||
.El
|
||||
.Pp
|
||||
Otherwise, the returned file descriptor behaves the same as a regular file,
|
||||
including the ability to be mapped by
|
||||
.Xr mmap 2 .
|
||||
.Sh RETURN VALUES
|
||||
If successful, the
|
||||
.Fn memfd_create
|
||||
system call returns a non-negative integer.
|
||||
On failure -1 is returned and
|
||||
.Fa errno
|
||||
is set to indicate the error.
|
||||
.Sh ERRORS
|
||||
.Fn memfd_create
|
||||
will fail if:
|
||||
.Bl -tag -width Er
|
||||
.It Bq Er EFAULT
|
||||
The argument
|
||||
.Fa name
|
||||
is
|
||||
.Dv NULL
|
||||
or points to invalid memory.
|
||||
.It Bq Er EINVAL
|
||||
The argument
|
||||
.Fa flags
|
||||
has any bits set other than
|
||||
.Dv MFD_CLOEXEC
|
||||
or
|
||||
.Dv MFD_ALLOW_SEALING .
|
||||
.It Bq Er ENAMETOOLONG
|
||||
The length of
|
||||
.Fa name
|
||||
appended with the prefix
|
||||
.Dq memfd:
|
||||
would exceed
|
||||
.Dv NAME_MAX .
|
||||
.It Bq Er ENFILE
|
||||
The system file table is full.
|
||||
.El
|
||||
.Sh SEE ALSO
|
||||
.Xr fcntl 2 ,
|
||||
.Xr mmap 2 ,
|
||||
.Xr shmget 2 ,
|
||||
.Xr shm_open 3
|
||||
.Sh HISTORY
|
||||
.Fn memfd_create
|
||||
is compatible with the Linux system call of the same name that first appeared in
|
||||
Linux 3.17.
|
|
@ -1,4 +1,4 @@
|
|||
$NetBSD: syscalls.master,v 1.67 2021/12/02 04:29:48 ryo Exp $
|
||||
$NetBSD: syscalls.master,v 1.68 2023/07/10 02:31:55 christos Exp $
|
||||
|
||||
; @(#)syscalls.master 8.1 (Berkeley) 7/19/93
|
||||
|
||||
|
@ -567,6 +567,145 @@
|
|||
311 UNIMPL process_vm_writev
|
||||
312 UNIMPL kcmp
|
||||
313 UNIMPL finit_module
|
||||
314 UNIMPL sched_setattr
|
||||
315 UNIMPL sched_getattr
|
||||
316 UNIMPL renameat2
|
||||
317 UNIMPL seccomp
|
||||
318 NOARGS { ssize_t|sys||getrandom(void *buf, size_t buflen, \
|
||||
unsigned int flags); }
|
||||
319 STD { int|linux_sys||memfd_create(const char *name, \
|
||||
unsigned int flags); }
|
||||
320 UNIMPL kexec_file_load
|
||||
321 UNIMPL bpf
|
||||
322 UNIMPL execveat
|
||||
323 UNIMPL userfaultfd
|
||||
324 UNIMPL membarrier
|
||||
325 UNIMPL mlock2
|
||||
326 UNIMPL copy_file_range
|
||||
327 UNIMPL preadv2
|
||||
328 UNIMPL pwritev2
|
||||
329 UNIMPL pkey_mprotect
|
||||
330 UNIMPL pkey_alloc
|
||||
331 UNIMPL pkey_free
|
||||
332 UNIMPL statx
|
||||
333 UNIMPL io_pgetevents
|
||||
334 UNIMPL rseq
|
||||
335 UNIMPL
|
||||
336 UNIMPL
|
||||
337 UNIMPL
|
||||
338 UNIMPL
|
||||
339 UNIMPL
|
||||
340 UNIMPL
|
||||
341 UNIMPL
|
||||
342 UNIMPL
|
||||
343 UNIMPL
|
||||
344 UNIMPL
|
||||
345 UNIMPL
|
||||
346 UNIMPL
|
||||
347 UNIMPL
|
||||
348 UNIMPL
|
||||
349 UNIMPL
|
||||
350 UNIMPL
|
||||
351 UNIMPL
|
||||
352 UNIMPL
|
||||
353 UNIMPL
|
||||
354 UNIMPL
|
||||
355 UNIMPL
|
||||
356 UNIMPL
|
||||
357 UNIMPL
|
||||
358 UNIMPL
|
||||
359 UNIMPL
|
||||
360 UNIMPL
|
||||
361 UNIMPL
|
||||
362 UNIMPL
|
||||
363 UNIMPL
|
||||
364 UNIMPL
|
||||
365 UNIMPL
|
||||
366 UNIMPL
|
||||
367 UNIMPL
|
||||
368 UNIMPL
|
||||
369 UNIMPL
|
||||
370 UNIMPL
|
||||
371 UNIMPL
|
||||
372 UNIMPL
|
||||
373 UNIMPL
|
||||
374 UNIMPL
|
||||
375 UNIMPL
|
||||
376 UNIMPL
|
||||
377 UNIMPL
|
||||
378 UNIMPL
|
||||
379 UNIMPL
|
||||
380 UNIMPL
|
||||
381 UNIMPL
|
||||
382 UNIMPL
|
||||
383 UNIMPL
|
||||
384 UNIMPL
|
||||
385 UNIMPL
|
||||
386 UNIMPL
|
||||
387 UNIMPL
|
||||
388 UNIMPL
|
||||
389 UNIMPL
|
||||
390 UNIMPL
|
||||
391 UNIMPL
|
||||
392 UNIMPL
|
||||
393 UNIMPL
|
||||
394 UNIMPL
|
||||
395 UNIMPL
|
||||
396 UNIMPL
|
||||
397 UNIMPL
|
||||
398 UNIMPL
|
||||
399 UNIMPL
|
||||
400 UNIMPL
|
||||
401 UNIMPL
|
||||
402 UNIMPL
|
||||
403 UNIMPL
|
||||
404 UNIMPL
|
||||
405 UNIMPL
|
||||
406 UNIMPL
|
||||
407 UNIMPL
|
||||
408 UNIMPL
|
||||
409 UNIMPL
|
||||
410 UNIMPL
|
||||
411 UNIMPL
|
||||
412 UNIMPL
|
||||
413 UNIMPL
|
||||
414 UNIMPL
|
||||
415 UNIMPL
|
||||
416 UNIMPL
|
||||
417 UNIMPL
|
||||
418 UNIMPL
|
||||
419 UNIMPL
|
||||
420 UNIMPL
|
||||
421 UNIMPL
|
||||
422 UNIMPL
|
||||
423 UNIMPL
|
||||
424 UNIMPL pidfd_send_signal
|
||||
425 UNIMPL io_uring_setup
|
||||
426 UNIMPL io_uring_enter
|
||||
427 UNIMPL io_uring_register
|
||||
428 UNIMPL open_tree
|
||||
429 UNIMPL move_mount
|
||||
430 UNIMPL fsopen
|
||||
431 UNIMPL fsconfig
|
||||
432 UNIMPL fsmount
|
||||
433 UNIMPL fspick
|
||||
434 UNIMPL pidfd_open
|
||||
435 UNIMPL clone3
|
||||
436 UNIMPL close_range
|
||||
437 UNIMPL openat2
|
||||
438 UNIMPL pidfd_getfd
|
||||
439 UNIMPL faccessat2
|
||||
440 UNIMPL process_madvise
|
||||
441 UNIMPL epoll_pwait2
|
||||
442 UNIMPL mount_setattr
|
||||
443 UNIMPL quotactl_fd
|
||||
444 UNIMPL landlock_create_ruleset
|
||||
445 UNIMPL landlock_add_rule
|
||||
446 UNIMPL landlock_restrict_self
|
||||
447 UNIMPL memfd_secret
|
||||
448 UNIMPL process_mrelease
|
||||
449 UNIMPL futex_waitv
|
||||
450 UNIMPL set_mempolicy_home_node
|
||||
|
||||
; we want a "nosys" syscall, we'll just add an extra entry for it.
|
||||
314 STD { int|linux_sys||nosys(void); }
|
||||
451 STD { int|linux_sys||nosys(void); }
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: linux_fcntl.h,v 1.20 2021/11/25 02:27:08 ryo Exp $ */
|
||||
/* $NetBSD: linux_fcntl.h,v 1.21 2023/07/10 02:31:55 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1995, 1998 The NetBSD Foundation, Inc.
|
||||
|
@ -100,6 +100,8 @@ struct linux_flock64 {
|
|||
#define LINUX_F_DUPFD_CLOEXEC (LINUX_F_SPECIFIC_BASE + 6)
|
||||
#define LINUX_F_SETPIPE_SZ (LINUX_F_SPECIFIC_BASE + 7)
|
||||
#define LINUX_F_GETPIPE_SZ (LINUX_F_SPECIFIC_BASE + 8)
|
||||
#define LINUX_F_ADD_SEALS (LINUX_F_SPECIFIC_BASE + 9)
|
||||
#define LINUX_F_GET_SEALS (LINUX_F_SPECIFIC_BASE + 10)
|
||||
|
||||
/*
|
||||
* We have to have 4 copies of the code that converts linux fcntl() file
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: linux_file.c,v 1.122 2021/11/25 03:08:04 ryo Exp $ */
|
||||
/* $NetBSD: linux_file.c,v 1.123 2023/07/10 02:31:55 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1995, 1998, 2008 The NetBSD Foundation, Inc.
|
||||
|
@ -35,7 +35,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: linux_file.c,v 1.122 2021/11/25 03:08:04 ryo Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: linux_file.c,v 1.123 2023/07/10 02:31:55 christos Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
@ -440,6 +440,14 @@ linux_sys_fcntl(struct lwp *l, const struct linux_sys_fcntl_args *uap, register_
|
|||
cmd = F_DUPFD_CLOEXEC;
|
||||
break;
|
||||
|
||||
case LINUX_F_ADD_SEALS:
|
||||
cmd = F_ADD_SEALS;
|
||||
break;
|
||||
|
||||
case LINUX_F_GET_SEALS:
|
||||
cmd = F_GET_SEALS;
|
||||
break;
|
||||
|
||||
default:
|
||||
return EOPNOTSUPP;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: linux_misc.c,v 1.256 2021/12/02 04:29:48 ryo Exp $ */
|
||||
/* $NetBSD: linux_misc.c,v 1.257 2023/07/10 02:31:55 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1995, 1998, 1999, 2008 The NetBSD Foundation, Inc.
|
||||
|
@ -57,7 +57,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: linux_misc.c,v 1.256 2021/12/02 04:29:48 ryo Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: linux_misc.c,v 1.257 2023/07/10 02:31:55 christos Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
@ -1681,3 +1681,66 @@ linux_sys_eventfd2(struct lwp *l, const struct linux_sys_eventfd2_args *uap,
|
|||
return linux_do_eventfd2(l, SCARG(uap, initval), SCARG(uap, flags),
|
||||
retval);
|
||||
}
|
||||
|
||||
#define LINUX_MFD_CLOEXEC 0x0001U
|
||||
#define LINUX_MFD_ALLOW_SEALING 0x0002U
|
||||
#define LINUX_MFD_HUGETLB 0x0004U
|
||||
#define LINUX_MFD_NOEXEC_SEAL 0x0008U
|
||||
#define LINUX_MFD_EXEC 0x0010U
|
||||
#define LINUX_MFD_HUGE_FLAGS (0x3f << 26)
|
||||
|
||||
#define LINUX_MFD_ALL_FLAGS (LINUX_MFD_CLOEXEC|LINUX_MFD_ALLOW_SEALING \
|
||||
|LINUX_MFD_HUGETLB|LINUX_MFD_NOEXEC_SEAL \
|
||||
|LINUX_MFD_EXEC|LINUX_MFD_HUGE_FLAGS)
|
||||
#define LINUX_MFD_KNOWN_FLAGS (LINUX_MFD_CLOEXEC|LINUX_MFD_ALLOW_SEALING)
|
||||
|
||||
#define LINUX_MFD_NAME_MAX 249
|
||||
|
||||
/*
|
||||
* memfd_create(2). Do some error checking and then call NetBSD's
|
||||
* version.
|
||||
*/
|
||||
int
|
||||
linux_sys_memfd_create(struct lwp *l,
|
||||
const struct linux_sys_memfd_create_args *uap, register_t *retval)
|
||||
{
|
||||
/* {
|
||||
syscallarg(const char *) name;
|
||||
syscallarg(unsigned int) flags;
|
||||
} */
|
||||
int error;
|
||||
char *pbuf;
|
||||
struct sys_memfd_create_args muap;
|
||||
const unsigned int lflags = SCARG(uap, flags);
|
||||
|
||||
KASSERT(LINUX_MFD_NAME_MAX < NAME_MAX); /* sanity check */
|
||||
|
||||
if (lflags & ~LINUX_MFD_ALL_FLAGS)
|
||||
return EINVAL;
|
||||
if ((lflags & LINUX_MFD_HUGE_FLAGS) != 0 &&
|
||||
(lflags & LINUX_MFD_HUGETLB) == 0)
|
||||
return EINVAL;
|
||||
if ((lflags & LINUX_MFD_HUGETLB) && (lflags & LINUX_MFD_ALLOW_SEALING))
|
||||
return EINVAL;
|
||||
|
||||
/* Linux has a stricter limit for name size */
|
||||
pbuf = PNBUF_GET();
|
||||
error = copyinstr(SCARG(uap, name), pbuf, LINUX_MFD_NAME_MAX+1, NULL);
|
||||
PNBUF_PUT(pbuf);
|
||||
pbuf = NULL;
|
||||
if (error != 0) {
|
||||
if (error == ENAMETOOLONG)
|
||||
error = EINVAL;
|
||||
return error;
|
||||
}
|
||||
|
||||
if (lflags & ~LINUX_MFD_KNOWN_FLAGS) {
|
||||
DPRINTF(("linux_sys_memfd_create: ignored flags %x\n",
|
||||
lflags & ~LINUX_MFD_KNOWN_FLAGS));
|
||||
}
|
||||
|
||||
SCARG(&muap, name) = SCARG(uap, name);
|
||||
SCARG(&muap, flags) = lflags & LINUX_MFD_KNOWN_FLAGS;
|
||||
|
||||
return sys_memfd_create(l, &muap, retval);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: linux_sysctl.c,v 1.47 2021/09/23 06:56:27 ryo Exp $ */
|
||||
/* $NetBSD: linux_sysctl.c,v 1.48 2023/07/10 02:31:55 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2003, 2008 The NetBSD Foundation, Inc.
|
||||
|
@ -34,7 +34,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: linux_sysctl.c,v 1.47 2021/09/23 06:56:27 ryo Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: linux_sysctl.c,v 1.48 2023/07/10 02:31:55 christos Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
@ -57,8 +57,8 @@ __KERNEL_RCSID(0, "$NetBSD: linux_sysctl.c,v 1.47 2021/09/23 06:56:27 ryo Exp $"
|
|||
#include <compat/linux/common/linux_machdep.h>
|
||||
|
||||
char linux_sysname[128] = "Linux";
|
||||
char linux_release[128] = "3.11.6";
|
||||
char linux_version[128] = "#1 SMP PREEMPT Thu Oct 24 16:23:02 UTC 2013";
|
||||
char linux_release[128] = "6.3.10";
|
||||
char linux_version[128] = "#1 SMP PREEMPT_DYNAMIC Wed Jun 28 18:34:30 UTC 2023";
|
||||
|
||||
struct sysctlnode linux_sysctl_root = {
|
||||
.sysctl_flags = SYSCTL_VERSION|
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: sys_descrip.c,v 1.47 2023/05/14 09:29:58 riastradh Exp $ */
|
||||
/* $NetBSD: sys_descrip.c,v 1.48 2023/07/10 02:31:55 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2008, 2020 The NetBSD Foundation, Inc.
|
||||
|
@ -67,7 +67,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.47 2023/05/14 09:29:58 riastradh Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.48 2023/07/10 02:31:55 christos Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
@ -315,26 +315,6 @@ out: if (fp)
|
|||
return error;
|
||||
}
|
||||
|
||||
static int
|
||||
do_fcntl_getpath(struct lwp *l, file_t *fp, char *upath)
|
||||
{
|
||||
char *kpath;
|
||||
int error;
|
||||
|
||||
if (fp->f_type != DTYPE_VNODE)
|
||||
return EOPNOTSUPP;
|
||||
|
||||
kpath = PNBUF_GET();
|
||||
|
||||
error = vnode_to_path(kpath, MAXPATHLEN, fp->f_vnode, l, l->l_proc);
|
||||
if (!error)
|
||||
error = copyoutstr(kpath, upath, MAXPATHLEN, NULL);
|
||||
|
||||
PNBUF_PUT(kpath);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
* The file control system call.
|
||||
*/
|
||||
|
@ -350,6 +330,7 @@ sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval)
|
|||
filedesc_t *fdp;
|
||||
fdtab_t *dt;
|
||||
file_t *fp;
|
||||
char *kpath;
|
||||
struct flock fl;
|
||||
bool cloexec = false;
|
||||
|
||||
|
@ -486,7 +467,30 @@ sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval)
|
|||
break;
|
||||
|
||||
case F_GETPATH:
|
||||
error = do_fcntl_getpath(l, fp, SCARG(uap, arg));
|
||||
kpath = PNBUF_GET();
|
||||
|
||||
/* vnodes need extra context, so are handled separately */
|
||||
if (fp->f_type == DTYPE_VNODE)
|
||||
error = vnode_to_path(kpath, MAXPATHLEN, fp->f_vnode,
|
||||
l, l->l_proc);
|
||||
else
|
||||
error = (*fp->f_ops->fo_fcntl)(fp, F_GETPATH, kpath);
|
||||
|
||||
if (error == 0)
|
||||
error = copyoutstr(kpath, SCARG(uap, arg), MAXPATHLEN,
|
||||
NULL);
|
||||
|
||||
PNBUF_PUT(kpath);
|
||||
break;
|
||||
|
||||
case F_ADD_SEALS:
|
||||
tmp = (int)(uintptr_t) SCARG(uap, arg);
|
||||
error = (*fp->f_ops->fo_fcntl)(fp, F_ADD_SEALS, &tmp);
|
||||
break;
|
||||
|
||||
case F_GET_SEALS:
|
||||
error = (*fp->f_ops->fo_fcntl)(fp, F_GET_SEALS, &tmp);
|
||||
*retval = tmp;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -0,0 +1,408 @@
|
|||
/* $NetBSD: sys_memfd.c,v 1.1 2023/07/10 02:31:55 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2023 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Theodore Preduta.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: sys_memfd.c,v 1.1 2023/07/10 02:31:55 christos Exp $");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <sys/file.h>
|
||||
#include <sys/filedesc.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/syscallargs.h>
|
||||
|
||||
#include <uvm/uvm_extern.h>
|
||||
#include <uvm/uvm_object.h>
|
||||
|
||||
#define F_SEAL_ANY_WRITE (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE)
|
||||
#define MFD_KNOWN_SEALS (F_SEAL_SEAL|F_SEAL_SHRINK|F_SEAL_GROW \
|
||||
|F_SEAL_WRITE|F_SEAL_FUTURE_WRITE)
|
||||
|
||||
static const char memfd_prefix[] = "memfd:";
|
||||
|
||||
static int memfd_read(file_t *, off_t *, struct uio *, kauth_cred_t, int);
|
||||
static int memfd_write(file_t *, off_t *, struct uio *, kauth_cred_t, int);
|
||||
static int memfd_ioctl(file_t *, u_long, void *);
|
||||
static int memfd_fcntl(file_t *, u_int, void *);
|
||||
static int memfd_stat(file_t *, struct stat *);
|
||||
static int memfd_close(file_t *);
|
||||
static int memfd_mmap(file_t *, off_t *, size_t, int, int *, int *,
|
||||
struct uvm_object **, int *);
|
||||
static int memfd_seek(file_t *, off_t, int, off_t *, int);
|
||||
static int memfd_truncate(file_t *, off_t);
|
||||
|
||||
static const struct fileops memfd_fileops = {
|
||||
.fo_name = "memfd",
|
||||
.fo_read = memfd_read,
|
||||
.fo_write = memfd_write,
|
||||
.fo_ioctl = memfd_ioctl,
|
||||
.fo_fcntl = memfd_fcntl,
|
||||
.fo_poll = fnullop_poll,
|
||||
.fo_stat = memfd_stat,
|
||||
.fo_close = memfd_close,
|
||||
.fo_kqfilter = fnullop_kqfilter,
|
||||
.fo_restart = fnullop_restart,
|
||||
.fo_mmap = memfd_mmap,
|
||||
.fo_seek = memfd_seek,
|
||||
.fo_fpathconf = (void *)eopnotsupp,
|
||||
.fo_posix_fadvise = (void *)eopnotsupp,
|
||||
.fo_truncate = memfd_truncate,
|
||||
};
|
||||
|
||||
/*
|
||||
* memfd_create(2). Creat a file descriptor associated with anonymous
|
||||
* memory.
|
||||
*/
|
||||
int
|
||||
sys_memfd_create(struct lwp *l, const struct sys_memfd_create_args *uap,
|
||||
register_t *retval)
|
||||
{
|
||||
/* {
|
||||
syscallarg(const char *) name;
|
||||
syscallarg(unsigned int) flags;
|
||||
} */
|
||||
int error, fd;
|
||||
file_t *fp;
|
||||
struct memfd *mfd;
|
||||
struct proc *p = l->l_proc;
|
||||
const unsigned int flags = SCARG(uap, flags);
|
||||
|
||||
KASSERT(NAME_MAX - sizeof(memfd_prefix) > 0); /* sanity check */
|
||||
|
||||
if (flags & ~(MFD_CLOEXEC|MFD_ALLOW_SEALING))
|
||||
return EINVAL;
|
||||
|
||||
mfd = kmem_zalloc(sizeof(*mfd), KM_SLEEP);
|
||||
mfd->mfd_size = 0;
|
||||
mfd->mfd_uobj = uao_create(INT64_MAX - PAGE_SIZE, 0); /* same as tmpfs */
|
||||
mutex_init(&mfd->mfd_lock, MUTEX_DEFAULT, IPL_NONE);
|
||||
|
||||
strcpy(mfd->mfd_name, memfd_prefix);
|
||||
error = copyinstr(SCARG(uap, name),
|
||||
&mfd->mfd_name[sizeof(memfd_prefix) - 1],
|
||||
sizeof(mfd->mfd_name) - sizeof(memfd_prefix), NULL);
|
||||
if (error != 0)
|
||||
goto leave;
|
||||
|
||||
getnanotime(&mfd->mfd_btime);
|
||||
|
||||
if ((flags & MFD_ALLOW_SEALING) == 0)
|
||||
mfd->mfd_seals |= F_SEAL_SEAL;
|
||||
|
||||
error = fd_allocfile(&fp, &fd);
|
||||
if (error != 0)
|
||||
goto leave;
|
||||
|
||||
fp->f_flag = FREAD|FWRITE;
|
||||
fp->f_type = DTYPE_MEMFD;
|
||||
fp->f_ops = &memfd_fileops;
|
||||
fp->f_memfd = mfd;
|
||||
fd_set_exclose(l, fd, (flags & MFD_CLOEXEC) != 0);
|
||||
fd_affix(p, fp, fd);
|
||||
|
||||
*retval = fd;
|
||||
return 0;
|
||||
|
||||
leave:
|
||||
uao_detach(mfd->mfd_uobj);
|
||||
kmem_free(mfd, sizeof(*mfd));
|
||||
return error;
|
||||
}
|
||||
|
||||
static int
|
||||
memfd_read(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
|
||||
int flags)
|
||||
{
|
||||
int error;
|
||||
vsize_t todo;
|
||||
struct memfd *mfd = fp->f_memfd;
|
||||
|
||||
if (offp == &fp->f_offset)
|
||||
mutex_enter(&fp->f_lock);
|
||||
|
||||
if (*offp < 0) {
|
||||
error = EINVAL;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
/* Trying to read past the end does nothing. */
|
||||
if (*offp >= mfd->mfd_size) {
|
||||
error = 0;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
uio->uio_offset = *offp;
|
||||
todo = MIN(uio->uio_resid, mfd->mfd_size - *offp);
|
||||
error = ubc_uiomove(mfd->mfd_uobj, uio, todo, UVM_ADV_SEQUENTIAL,
|
||||
UBC_READ|UBC_PARTIALOK);
|
||||
|
||||
leave:
|
||||
if (offp == &fp->f_offset)
|
||||
mutex_exit(&fp->f_lock);
|
||||
|
||||
getnanotime(&mfd->mfd_atime);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static int
|
||||
memfd_write(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
|
||||
int flags)
|
||||
{
|
||||
int error;
|
||||
vsize_t todo;
|
||||
struct memfd *mfd = fp->f_memfd;
|
||||
|
||||
if (mfd->mfd_seals & F_SEAL_ANY_WRITE)
|
||||
return EPERM;
|
||||
|
||||
if (offp == &fp->f_offset)
|
||||
mutex_enter(&fp->f_lock);
|
||||
|
||||
if (*offp < 0) {
|
||||
error = EINVAL;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
uio->uio_offset = *offp;
|
||||
todo = uio->uio_resid;
|
||||
|
||||
if (mfd->mfd_seals & F_SEAL_GROW) {
|
||||
if (*offp >= mfd->mfd_size) {
|
||||
error = EPERM;
|
||||
goto leave;
|
||||
}
|
||||
|
||||
/* Truncate the write to fit in mfd_size */
|
||||
if (*offp + uio->uio_resid >= mfd->mfd_size)
|
||||
todo = mfd->mfd_size - *offp;
|
||||
} else if (*offp + uio->uio_resid >= mfd->mfd_size) {
|
||||
/* Grow to accommodate the write request. */
|
||||
error = memfd_truncate(fp, *offp + uio->uio_resid);
|
||||
if (error != 0)
|
||||
goto leave;
|
||||
}
|
||||
|
||||
error = ubc_uiomove(mfd->mfd_uobj, uio, todo, UVM_ADV_SEQUENTIAL,
|
||||
UBC_WRITE|UBC_PARTIALOK);
|
||||
|
||||
getnanotime(&mfd->mfd_mtime);
|
||||
|
||||
leave:
|
||||
if (offp == &fp->f_offset)
|
||||
mutex_exit(&fp->f_lock);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static int
|
||||
memfd_ioctl(file_t *fp, u_long cmd, void *data)
|
||||
{
|
||||
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
static int
|
||||
memfd_fcntl(file_t *fp, u_int cmd, void *data)
|
||||
{
|
||||
struct memfd *mfd = fp->f_memfd;
|
||||
|
||||
switch (cmd) {
|
||||
case F_ADD_SEALS:
|
||||
if (mfd->mfd_seals & F_SEAL_SEAL)
|
||||
return EPERM;
|
||||
|
||||
if (*(int *)data & ~MFD_KNOWN_SEALS)
|
||||
return EINVAL;
|
||||
|
||||
/*
|
||||
* Can only add F_SEAL_WRITE if there are no currently
|
||||
* open mmaps.
|
||||
*
|
||||
* XXX should only disallow if there are no currently
|
||||
* open mmaps with PROT_WRITE.
|
||||
*/
|
||||
if ((mfd->mfd_seals & F_SEAL_WRITE) == 0 &&
|
||||
(*(int *)data & F_SEAL_WRITE) != 0 &&
|
||||
mfd->mfd_uobj->uo_refs > 1)
|
||||
return EBUSY;
|
||||
|
||||
mfd->mfd_seals |= *(int *)data;
|
||||
return 0;
|
||||
|
||||
case F_GET_SEALS:
|
||||
*(int *)data = mfd->mfd_seals;
|
||||
return 0;
|
||||
|
||||
default:
|
||||
return EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
memfd_stat(file_t *fp, struct stat *st)
|
||||
{
|
||||
struct memfd *mfd = fp->f_memfd;
|
||||
|
||||
memset(st, 0, sizeof(*st));
|
||||
st->st_uid = kauth_cred_geteuid(fp->f_cred);
|
||||
st->st_gid = kauth_cred_getegid(fp->f_cred);
|
||||
st->st_size = mfd->mfd_size;
|
||||
|
||||
st->st_mode = S_IREAD;
|
||||
if ((mfd->mfd_seals & F_SEAL_ANY_WRITE) == 0)
|
||||
st->st_mode |= S_IWRITE;
|
||||
|
||||
st->st_birthtimespec = mfd->mfd_btime;
|
||||
st->st_ctimespec = mfd->mfd_mtime;
|
||||
st->st_atimespec = mfd->mfd_atime;
|
||||
st->st_mtimespec = mfd->mfd_mtime;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
memfd_close(file_t *fp)
|
||||
{
|
||||
struct memfd *mfd = fp->f_memfd;
|
||||
|
||||
uao_detach(mfd->mfd_uobj);
|
||||
mutex_destroy(&mfd->mfd_lock);
|
||||
|
||||
kmem_free(mfd, sizeof(*mfd));
|
||||
fp->f_memfd = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
memfd_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
|
||||
int *advicep, struct uvm_object **uobjp, int *maxprotp)
|
||||
{
|
||||
struct memfd *mfd = fp->f_memfd;
|
||||
|
||||
/* uvm_mmap guarantees page-aligned offset and size. */
|
||||
KASSERT(*offp == round_page(*offp));
|
||||
KASSERT(size == round_page(size));
|
||||
KASSERT(size > 0);
|
||||
|
||||
if (*offp < 0)
|
||||
return EINVAL;
|
||||
if (*offp + size > mfd->mfd_size)
|
||||
return EINVAL;
|
||||
|
||||
if ((mfd->mfd_seals & F_SEAL_ANY_WRITE) &&
|
||||
(prot & VM_PROT_WRITE) && (*flagsp & MAP_PRIVATE) == 0)
|
||||
return EPERM;
|
||||
|
||||
uao_reference(fp->f_memfd->mfd_uobj);
|
||||
*uobjp = fp->f_memfd->mfd_uobj;
|
||||
|
||||
*maxprotp = prot;
|
||||
*advicep = UVM_ADV_RANDOM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
memfd_seek(file_t *fp, off_t delta, int whence, off_t *newoffp,
|
||||
int flags)
|
||||
{
|
||||
off_t newoff;
|
||||
int error;
|
||||
|
||||
switch (whence) {
|
||||
case SEEK_CUR:
|
||||
newoff = fp->f_offset + delta;
|
||||
break;
|
||||
|
||||
case SEEK_END:
|
||||
newoff = fp->f_memfd->mfd_size + delta;
|
||||
break;
|
||||
|
||||
case SEEK_SET:
|
||||
newoff = delta;
|
||||
break;
|
||||
|
||||
default:
|
||||
error = EINVAL;
|
||||
return error;
|
||||
}
|
||||
|
||||
if (newoffp)
|
||||
*newoffp = newoff;
|
||||
if (flags & FOF_UPDATE_OFFSET)
|
||||
fp->f_offset = newoff;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
memfd_truncate(file_t *fp, off_t length)
|
||||
{
|
||||
struct memfd *mfd = fp->f_memfd;
|
||||
int error = 0;
|
||||
voff_t start, end;
|
||||
|
||||
if (length < 0)
|
||||
return EINVAL;
|
||||
if (length == mfd->mfd_size)
|
||||
return 0;
|
||||
|
||||
if ((mfd->mfd_seals & F_SEAL_SHRINK) && length < mfd->mfd_size)
|
||||
return EPERM;
|
||||
if ((mfd->mfd_seals & F_SEAL_GROW) && length > mfd->mfd_size)
|
||||
return EPERM;
|
||||
|
||||
mutex_enter(&mfd->mfd_lock);
|
||||
|
||||
if (length > mfd->mfd_size)
|
||||
ubc_zerorange(mfd->mfd_uobj, mfd->mfd_size,
|
||||
length - mfd->mfd_size, 0);
|
||||
else {
|
||||
/* length < mfd->mfd_size, so try to get rid of excess pages */
|
||||
start = round_page(length);
|
||||
end = round_page(mfd->mfd_size);
|
||||
|
||||
if (start < end) { /* we actually have pages to remove */
|
||||
rw_enter(mfd->mfd_uobj->vmobjlock, RW_WRITER);
|
||||
error = (*mfd->mfd_uobj->pgops->pgo_put)(mfd->mfd_uobj,
|
||||
start, end, PGO_FREE);
|
||||
/* pgo_put drops vmobjlock */
|
||||
}
|
||||
}
|
||||
|
||||
getnanotime(&mfd->mfd_mtime);
|
||||
mfd->mfd_size = length;
|
||||
mutex_exit(&mfd->mfd_lock);
|
||||
return error;
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
$NetBSD: syscalls.master,v 1.309 2021/11/01 05:07:17 thorpej Exp $
|
||||
$NetBSD: syscalls.master,v 1.310 2023/07/10 02:33:04 christos Exp $
|
||||
|
||||
; @(#)syscalls.master 8.2 (Berkeley) 1/13/94
|
||||
|
||||
|
@ -1049,3 +1049,5 @@
|
|||
498 STD { int|sys||__acl_aclcheck_fd(int filedes, \
|
||||
acl_type_t type, struct acl *aclp); }
|
||||
499 STD RUMP { long|sys||lpathconf(const char *path, int name); }
|
||||
500 STD { int|sys||memfd_create(const char *name, \
|
||||
unsigned int flags); }
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: vfs_syscalls.c,v 1.559 2023/04/29 06:34:20 riastradh Exp $ */
|
||||
/* $NetBSD: vfs_syscalls.c,v 1.560 2023/07/10 02:31:55 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc.
|
||||
|
@ -70,7 +70,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.559 2023/04/29 06:34:20 riastradh Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.560 2023/07/10 02:31:55 christos Exp $");
|
||||
|
||||
#ifdef _KERNEL_OPT
|
||||
#include "opt_fileassoc.h"
|
||||
|
@ -4149,34 +4149,19 @@ sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *r
|
|||
syscallarg(int) pad;
|
||||
syscallarg(off_t) length;
|
||||
} */
|
||||
struct vattr vattr;
|
||||
struct vnode *vp;
|
||||
file_t *fp;
|
||||
int error;
|
||||
int error, fd = SCARG(uap, fd);
|
||||
|
||||
if (SCARG(uap, length) < 0)
|
||||
return EINVAL;
|
||||
fp = fd_getfile(fd);
|
||||
if (fp == NULL)
|
||||
return EBADF;
|
||||
if (fp->f_ops->fo_truncate == NULL)
|
||||
error = EOPNOTSUPP;
|
||||
else
|
||||
error = (*fp->f_ops->fo_truncate)(fp, SCARG(uap, length));
|
||||
|
||||
/* fd_getvnode() will use the descriptor for us */
|
||||
if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
|
||||
return (error);
|
||||
if ((fp->f_flag & FWRITE) == 0) {
|
||||
error = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
vp = fp->f_vnode;
|
||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
if (vp->v_type == VDIR)
|
||||
error = EISDIR;
|
||||
else if ((error = vn_writechk(vp)) == 0) {
|
||||
vattr_null(&vattr);
|
||||
vattr.va_size = SCARG(uap, length);
|
||||
error = VOP_SETATTR(vp, &vattr, fp->f_cred);
|
||||
}
|
||||
VOP_UNLOCK(vp);
|
||||
out:
|
||||
fd_putfile(SCARG(uap, fd));
|
||||
return (error);
|
||||
fd_putfile(fd);
|
||||
return error;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: vfs_vnops.c,v 1.241 2023/04/22 13:53:02 riastradh Exp $ */
|
||||
/* $NetBSD: vfs_vnops.c,v 1.242 2023/07/10 02:31:55 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2009 The NetBSD Foundation, Inc.
|
||||
|
@ -66,7 +66,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.241 2023/04/22 13:53:02 riastradh Exp $");
|
||||
__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.242 2023/07/10 02:31:55 christos Exp $");
|
||||
|
||||
#include "veriexec.h"
|
||||
|
||||
|
@ -125,6 +125,7 @@ static int vn_seek(struct file *, off_t, int, off_t *, int);
|
|||
static int vn_advlock(struct file *, void *, int, struct flock *, int);
|
||||
static int vn_fpathconf(struct file *, int, register_t *);
|
||||
static int vn_posix_fadvise(struct file *, off_t, off_t, int);
|
||||
static int vn_truncate(file_t *, off_t);
|
||||
|
||||
const struct fileops vnops = {
|
||||
.fo_name = "vn",
|
||||
|
@ -142,6 +143,7 @@ const struct fileops vnops = {
|
|||
.fo_advlock = vn_advlock,
|
||||
.fo_fpathconf = vn_fpathconf,
|
||||
.fo_posix_fadvise = vn_posix_fadvise,
|
||||
.fo_truncate = vn_truncate,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -1331,6 +1333,33 @@ vn_posix_fadvise(struct file *fp, off_t offset, off_t len, int advice)
|
|||
return error;
|
||||
}
|
||||
|
||||
static int
|
||||
vn_truncate(file_t *fp, off_t length)
|
||||
{
|
||||
struct vattr vattr;
|
||||
struct vnode *vp;
|
||||
int error = 0;
|
||||
|
||||
if (length < 0)
|
||||
return EINVAL;
|
||||
|
||||
if ((fp->f_flag & FWRITE) == 0)
|
||||
return EINVAL;
|
||||
vp = fp->f_vnode;
|
||||
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
||||
if (vp->v_type == VDIR)
|
||||
error = EISDIR;
|
||||
else if ((error = vn_writechk(vp)) == 0) {
|
||||
vattr_null(&vattr);
|
||||
vattr.va_size = length;
|
||||
error = VOP_SETATTR(vp, &vattr, fp->f_cred);
|
||||
}
|
||||
VOP_UNLOCK(vp);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Check that the vnode is still valid, and if so
|
||||
* acquire requested lock.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: fcntl.h,v 1.54 2020/03/30 20:17:42 kamil Exp $ */
|
||||
/* $NetBSD: fcntl.h,v 1.55 2023/07/10 02:31:55 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1983, 1990, 1993
|
||||
|
@ -200,6 +200,8 @@
|
|||
#define F_GETNOSIGPIPE 13 /* get SIGPIPE disposition */
|
||||
#define F_SETNOSIGPIPE 14 /* set SIGPIPE disposition */
|
||||
#define F_GETPATH 15 /* get pathname associated with fd */
|
||||
#define F_ADD_SEALS 16 /* set seals */
|
||||
#define F_GET_SEALS 17 /* get seals */
|
||||
#endif
|
||||
|
||||
/* file descriptor flags (F_GETFD, F_SETFD) */
|
||||
|
@ -215,6 +217,15 @@
|
|||
#define F_POSIX 0x040 /* Use POSIX semantics for lock */
|
||||
#endif
|
||||
|
||||
/* types of seals (F_ADD_SEALS, F_GET_SEALS) */
|
||||
#if defined(_NETBSD_SOURCE)
|
||||
#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
|
||||
#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
|
||||
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
|
||||
#define F_SEAL_WRITE 0x0008 /* prevent writes */
|
||||
#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */
|
||||
#endif
|
||||
|
||||
/* Constants for fcntl's passed to the underlying fs - like ioctl's. */
|
||||
#if defined(_NETBSD_SOURCE)
|
||||
#define F_PARAM_MASK 0xfff
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: file.h,v 1.92 2023/04/22 13:53:02 riastradh Exp $ */
|
||||
/* $NetBSD: file.h,v 1.93 2023/07/10 02:31:55 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2009 The NetBSD Foundation, Inc.
|
||||
|
@ -103,6 +103,7 @@ struct fileops {
|
|||
int (*fo_fpathconf) (struct file *, int, register_t *);
|
||||
int (*fo_posix_fadvise)
|
||||
(struct file *, off_t, off_t, int);
|
||||
int (*fo_truncate) (struct file *, off_t);
|
||||
};
|
||||
|
||||
union file_data {
|
||||
|
@ -121,6 +122,7 @@ union file_data {
|
|||
struct mqueue *fd_mq; // DTYPE_MQUEUE
|
||||
struct ksem *fd_ks; // DTYPE_SEM
|
||||
struct iscsifd *fd_iscsi; // DTYPE_MISC (iscsi)
|
||||
struct memfd *fd_memfd; // DTYPE_MEMFD
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -160,6 +162,7 @@ struct file {
|
|||
#define f_ksem f_undata.fd_ks
|
||||
#define f_eventfd f_undata.fd_eventfd
|
||||
#define f_timerfd f_undata.fd_timerfd
|
||||
#define f_memfd f_undata.fd_memfd
|
||||
|
||||
#define f_rndctx f_undata.fd_rndctx
|
||||
#define f_audioctx f_undata.fd_audioctx
|
||||
|
@ -184,10 +187,11 @@ struct file {
|
|||
#define DTYPE_SEM 8 /* semaphore */
|
||||
#define DTYPE_EVENTFD 9 /* eventfd */
|
||||
#define DTYPE_TIMERFD 10 /* timerfd */
|
||||
#define DTYPE_MEMFD 11 /* memfd */
|
||||
|
||||
#define DTYPE_NAMES \
|
||||
"0", "file", "socket", "pipe", "kqueue", "misc", "crypto", "mqueue", \
|
||||
"semaphore", "eventfd", "timerfd"
|
||||
"semaphore", "eventfd", "timerfd", "memfd"
|
||||
|
||||
#ifdef _KERNEL
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: mman.h,v 1.62 2019/12/06 19:37:43 christos Exp $ */
|
||||
/* $NetBSD: mman.h,v 1.63 2023/07/10 02:31:55 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1982, 1986, 1993
|
||||
|
@ -212,7 +212,13 @@ typedef __off_t off_t; /* file offset */
|
|||
implemented in UVM */
|
||||
#define MAP_INHERIT_ZERO 4 /* zero in child */
|
||||
#define MAP_INHERIT_DEFAULT MAP_INHERIT_COPY
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Flags to memfd_create
|
||||
*/
|
||||
#define MFD_CLOEXEC 0x1U
|
||||
#define MFD_ALLOW_SEALING 0x2U
|
||||
#endif /* _NETBSD_SOURCE */
|
||||
|
||||
#ifndef _KERNEL
|
||||
|
||||
|
@ -234,12 +240,31 @@ int madvise(void *, size_t, int);
|
|||
int mincore(void *, size_t, char *);
|
||||
int minherit(void *, size_t, int);
|
||||
void * mremap(void *, size_t, void *, size_t, int);
|
||||
int memfd_create(const char *, unsigned int);
|
||||
#endif
|
||||
int posix_madvise(void *, size_t, int);
|
||||
int shm_open(const char *, int, mode_t);
|
||||
int shm_unlink(const char *);
|
||||
__END_DECLS
|
||||
|
||||
#else
|
||||
|
||||
#include <sys/syslimits.h> /* for NAME_MAX */
|
||||
#include <sys/timespec.h> /* for struct timespec */
|
||||
#include <sys/mutex.h> /* for kmutex_t */
|
||||
|
||||
struct memfd {
|
||||
char mfd_name[NAME_MAX+1];
|
||||
struct uvm_object *mfd_uobj;
|
||||
size_t mfd_size;
|
||||
int mfd_seals;
|
||||
kmutex_t mfd_lock; /* for truncate */
|
||||
|
||||
struct timespec mfd_btime;
|
||||
struct timespec mfd_atime;
|
||||
struct timespec mfd_mtime;
|
||||
};
|
||||
|
||||
#endif /* !_KERNEL */
|
||||
|
||||
#endif /* !_SYS_MMAN_H_ */
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: fstat.c,v 1.117 2022/10/28 05:27:16 ozaki-r Exp $ */
|
||||
/* $NetBSD: fstat.c,v 1.118 2023/07/10 02:31:55 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 1988, 1993
|
||||
|
@ -39,7 +39,7 @@ __COPYRIGHT("@(#) Copyright (c) 1988, 1993\
|
|||
#if 0
|
||||
static char sccsid[] = "@(#)fstat.c 8.3 (Berkeley) 5/2/95";
|
||||
#else
|
||||
__RCSID("$NetBSD: fstat.c,v 1.117 2022/10/28 05:27:16 ozaki-r Exp $");
|
||||
__RCSID("$NetBSD: fstat.c,v 1.118 2023/07/10 02:31:55 christos Exp $");
|
||||
#endif
|
||||
#endif /* not lint */
|
||||
|
||||
|
@ -548,6 +548,7 @@ ftrans(fdfile_t *fp, int i)
|
|||
case DTYPE_CRYPTO:
|
||||
case DTYPE_MQUEUE:
|
||||
case DTYPE_SEM:
|
||||
case DTYPE_MEMFD:
|
||||
if (checkfile == 0)
|
||||
misctrans(&file, i);
|
||||
break;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* $NetBSD: misc.c,v 1.24 2020/09/13 04:14:48 isaki Exp $ */
|
||||
/* $NetBSD: misc.c,v 1.25 2023/07/10 02:31:55 christos Exp $ */
|
||||
|
||||
/*-
|
||||
* Copyright (c) 2008 The NetBSD Foundation, Inc.
|
||||
|
@ -30,7 +30,7 @@
|
|||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__RCSID("$NetBSD: misc.c,v 1.24 2020/09/13 04:14:48 isaki Exp $");
|
||||
__RCSID("$NetBSD: misc.c,v 1.25 2023/07/10 02:31:55 christos Exp $");
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <sys/param.h>
|
||||
|
@ -56,6 +56,9 @@ __RCSID("$NetBSD: misc.c,v 1.24 2020/09/13 04:14:48 isaki Exp $");
|
|||
#undef _KERNEL
|
||||
#include <sys/cprng.h>
|
||||
#include <sys/vnode.h>
|
||||
#define _KERNEL
|
||||
#include <sys/mman.h>
|
||||
#undef _KERNEL
|
||||
#include <sys/mount.h>
|
||||
|
||||
#include <net/bpfdesc.h>
|
||||
|
@ -110,7 +113,9 @@ static struct nlist nl[] = {
|
|||
{ .n_name = "audio_fileops" },
|
||||
#define NL_PAD 19
|
||||
{ .n_name = "pad_fileops" },
|
||||
#define NL_MAX 20
|
||||
#define NL_MEMFD 20
|
||||
{ .n_name = "memfd_fileops" },
|
||||
#define NL_MAX 21
|
||||
{ .n_name = NULL }
|
||||
};
|
||||
|
||||
|
@ -263,6 +268,40 @@ p_audio(struct file *f)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
p_memfd_seal(int seen, int all, int target, const char *name)
|
||||
{
|
||||
if (all & target)
|
||||
(void)printf("%s%s", (seen ? "|" : ""), name);
|
||||
|
||||
return seen || (all & target);
|
||||
}
|
||||
|
||||
static int
|
||||
p_memfd(struct file *f)
|
||||
{
|
||||
int seal_yet = 0;
|
||||
struct memfd mfd;
|
||||
|
||||
if (!KVM_READ(f->f_data, &mfd, sizeof(mfd))) {
|
||||
dprintf("can't read memfd at %p for pid %d", f->f_data, Pid);
|
||||
return 0;
|
||||
}
|
||||
(void)printf("* %s, seals=", mfd.mfd_name);
|
||||
if (mfd.mfd_seals == 0)
|
||||
(void)printf("0");
|
||||
else {
|
||||
seal_yet = p_memfd_seal(seal_yet, mfd.mfd_seals, F_SEAL_SEAL, "F_SEAL_SEAL");
|
||||
seal_yet = p_memfd_seal(seal_yet, mfd.mfd_seals, F_SEAL_SHRINK, "F_SEAL_SHRINK");
|
||||
seal_yet = p_memfd_seal(seal_yet, mfd.mfd_seals, F_SEAL_GROW, "F_SEAL_GROW");
|
||||
seal_yet = p_memfd_seal(seal_yet, mfd.mfd_seals, F_SEAL_WRITE, "F_SEAL_WRITE");
|
||||
seal_yet = p_memfd_seal(seal_yet, mfd.mfd_seals, F_SEAL_FUTURE_WRITE, "F_SEAL_FUTURE_WRITE");
|
||||
}
|
||||
|
||||
oprint(f, "\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
pmisc(struct file *f, const char *name)
|
||||
{
|
||||
|
@ -310,6 +349,8 @@ pmisc(struct file *f, const char *name)
|
|||
case NL_PAD:
|
||||
printf("* pad %p", f->f_data);
|
||||
break;
|
||||
case NL_MEMFD:
|
||||
return p_memfd(f);
|
||||
case NL_MAX:
|
||||
printf("* %s ops=%p %p", name, f->f_ops, f->f_data);
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue