Add memfd_create(2) from GSoC 2023 by Theodore Preduta

This commit is contained in:
christos 2023-07-10 02:31:54 +00:00
parent 8d0cdbc342
commit 7eace3da0c
19 changed files with 1041 additions and 90 deletions

View File

@ -1,4 +1,4 @@
# $NetBSD: mi,v 1.2436 2023/07/08 17:43:13 christos Exp $
# $NetBSD: mi,v 1.2437 2023/07/10 02:31:54 christos Exp $
#
# Note: don't delete entries from here - mark them as "obsolete" instead.
./etc/mtree/set.comp comp-sys-root
@ -4826,6 +4826,7 @@
./usr/share/man/cat2/lutimes.0 comp-c-catman .cat
./usr/share/man/cat2/m68k_sync_icache.0 comp-c-catman .cat
./usr/share/man/cat2/madvise.0 comp-c-catman .cat
./usr/share/man/cat2/memfd_create.0 comp-c-catman .cat
./usr/share/man/cat2/mincore.0 comp-c-catman .cat
./usr/share/man/cat2/minherit.0 comp-c-catman .cat
./usr/share/man/cat2/mkdir.0 comp-c-catman .cat
@ -13315,6 +13316,7 @@
./usr/share/man/html2/lutimes.html comp-c-htmlman html
./usr/share/man/html2/m68k_sync_icache.html comp-c-htmlman html
./usr/share/man/html2/madvise.html comp-c-htmlman html
./usr/share/man/html2/memfd_create.html comp-c-htmlman html
./usr/share/man/html2/mincore.html comp-c-htmlman html
./usr/share/man/html2/minherit.html comp-c-htmlman html
./usr/share/man/html2/mkdir.html comp-c-htmlman html
@ -21632,6 +21634,7 @@
./usr/share/man/man2/lutimes.2 comp-c-man .man
./usr/share/man/man2/m68k_sync_icache.2 comp-c-man .man
./usr/share/man/man2/madvise.2 comp-c-man .man
./usr/share/man/man2/memfd_create.2 comp-c-man .man
./usr/share/man/man2/mincore.2 comp-c-man .man
./usr/share/man/man2/minherit.2 comp-c-man .man
./usr/share/man/man2/mkdir.2 comp-c-man .man

View File

@ -1,4 +1,4 @@
# $NetBSD: Makefile.inc,v 1.250 2021/11/01 05:53:45 thorpej Exp $
# $NetBSD: Makefile.inc,v 1.251 2023/07/10 02:31:54 christos Exp $
# @(#)Makefile.inc 8.3 (Berkeley) 10/24/94
# sys sources
@ -134,7 +134,8 @@ ASM=\
_lwp_unpark_all.S _lwp_suspend.S _lwp_continue.S \
_lwp_wakeup.S _lwp_detach.S _lwp_setprivate.S \
_lwp_setname.S _lwp_getname.S _lwp_ctl.S \
madvise.S mincore.S minherit.S mkdir.S mkdirat.S mkfifo.S mkfifoat.S \
madvise.S memfd_create.S mincore.S minherit.S mkdir.S mkdirat.S \
mkfifo.S mkfifoat.S \
__mknod50.S mlock.S mlockall.S modctl.S __mount50.S \
mprotect.S __msgctl50.S msgget.S munlock.S munlockall.S \
munmap.S \
@ -275,7 +276,7 @@ MAN+= accept.2 access.2 acct.2 adjtime.2 bind.2 brk.2 chdir.2 \
_lwp_suspend.2 _lwp_wakeup.2 _lwp_wait.2 _lwp_kill.2 \
_lwp_getname.2 _lwp_getprivate.2 \
_lwp_park.2 _lwp_unpark.2 _lwp_unpark_all.2 \
mkdir.2 mkfifo.2 mknod.2 \
memfd_create.2 mkdir.2 mkfifo.2 mknod.2 \
madvise.2 mincore.2 minherit.2 mlock.2 mlockall.2 mmap.2 modctl.2 \
mount.2 \
mprotect.2 mremap.2 msgctl.2 msgget.2 msgrcv.2 msgsnd.2 msync.2 \

View File

@ -1,4 +1,4 @@
.\" $NetBSD: fcntl.2,v 1.49 2022/12/04 19:01:19 uwe Exp $
.\" $NetBSD: fcntl.2,v 1.50 2023/07/10 02:31:54 christos Exp $
.\"
.\" Copyright (c) 1983, 1993
.\" The Regents of the University of California. All rights reserved.
@ -29,7 +29,7 @@
.\"
.\" @(#)fcntl.2 8.2 (Berkeley) 1/12/94
.\"
.Dd September 26, 2019
.Dd July 5, 2023
.Dt FCNTL 2
.Os
.Sh NAME
@ -162,6 +162,24 @@ in the buffer pointed to by
.Fa arg
should be pointing to a buffer of at least
.Dv MAXPATHLEN .
.It Dv F_ADD_SEALS
Add seals specified in
.Fa arg
to
.Fa fd
to restrict possible operations on
.Fa fd
as described below.
Like flags, multiple seals can be specified at once.
Additionally, specifying seals that are already associated with
.Fa fd
is a no-op.
.It Dv F_GET_SEALS
Get the seals currently associated with
.Fa fd
as described below
.Fa ( arg
is ignored).
.El
.Pp
The set of valid flags for the
@ -324,13 +342,44 @@ or an
request fails or blocks respectively when another process has existing
locks on bytes in the specified region and the type of any of those
locks conflicts with the type specified in the request.
.Pp
Possible seals are:
.Bl -tag -width F_SEAL_FUTURE_WRITE
.It Dv F_SEAL_SEAL
Prevent any further seals from being added to
.Fa fd .
.It Dv F_SEAL_SHRINK
Prevent the size of
.Fa fd
from decreasing.
.It Dv F_SEAL_GROW
Prevent the size of
.Fa fd
from increasing.
.It Dv F_SEAL_WRITE
Prevent any write operations to
.Fa fd .
.Dv F_SEAL_WRITE
cannot be applied if
.Fa fd
has any memory mappings.
.It Dv F_SEAL_FUTURE_WRITE
Like
.Dv F_SEAL_WRITE
but allow any current memory mappings of
.Fa fd
to remain open, including those with
.Dv PROT_WRITE .
.El
.Sh NOTES
The
.Dv F_GETPATH
functionality is implemented using the reverse
For
.Dv F_GETPATH :
.Bl -bullet -compact
.It
For vnodes, functionality is implemented using the reverse
.Xr namei 9
cache.
The implications of this are:
The implications of this are
.Bl -bullet -compact
.It
For hard links where the file descriptor can resolve to multiple pathnames,
@ -341,16 +390,25 @@ may fail if the corresponding entry has been evicted from the LRU
.Xr namei 9
cache and return
.Er ENOENT .
.El
.It
File descriptors that don't point to vnodes are not handled, as
well as symbolic links since there is currently no way to obtain
a file descriptor pointing to a symbolic link.
For a file descriptor created by
.Xr memfd_create 2 ,
the name provided at
.Fa fd
creation, with the prefix
.Dq memfd:
is used.
.It
Other types of file descriptors are not handled, as well as symbolic
links since there is currently no way to obtain a file descriptor
pointing to a symbolic link.
.El
.Sh RETURN VALUES
Upon successful completion, the value returned depends on
.Fa cmd
as follows:
.Bl -tag -width F_GETOWNX -offset indent
.Bl -tag -width F_GET_SEALS -offset indent
.It Dv F_DUPFD
A new file descriptor.
.It Dv F_GETFD
@ -361,6 +419,9 @@ Value of flags.
Value of file descriptor owner.
.It Dv F_MAXFD
Value of the highest file descriptor open by the process.
.It Dv F_GET_SEALS
Value of the seals currently associated with
.Fa fd .
.It other
Value other than \-1.
.El
@ -473,6 +534,18 @@ is an exclusive lock
and
.Fa fildes
is not a valid file descriptor open for writing.
.It Bq Er EBUSY
The argument
.Fa cmd
is
.Dv F_ADD_SEALS ,
.Fa arg
contains
.Dv F_SEAL_WRITE
and
.Fa fd
is currently mapped by
.Xr mmap 2 .
.It Bq Er EDEADLK
The argument
.Fa cmd
@ -512,6 +585,24 @@ and the data to which
points is not valid, or
.Fa fildes
refers to a file that does not support locking.
.Pp
The argument
.Fa cmd
is
.Dv F_ADD_SEALS
or
.Dv F_GET_SEALS
and
.Fa fd
does not support seals.
.Pp
The argument
.Fa cmd
is
.Dv F_ADD_SEALS
and
.Fa arg
contains set bits for unsupported seals.
.It Bq Er EMFILE
The argument
.Fa cmd
@ -562,6 +653,15 @@ has been reached.
It can be modified using the
.Li kern.maxfiles
.Xr sysctl 7 .
.It Bq Er EPERM
The argument
.Fa cmd
is
.Dv F_ADD_SEALS
and
.Fa fd
already has
.Dv F_SEAL_SEAL .
.It Bq Er ERANGE
The argument
.Fa cmd

125
lib/libc/sys/memfd_create.2 Normal file
View File

@ -0,0 +1,125 @@
.\" $NetBSD: memfd_create.2,v 1.1 2023/07/10 02:31:54 christos Exp $
.\"
.\" Copyright (c) 2023 The NetBSD Foundation, Inc.
.\" All rights reserved.
.\"
.\" This code is derived from software contributed to The NetBSD Foundation
.\" by Theodore Preduta.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
.\" POSSIBILITY OF SUCH DAMAGE.
.\"
.Dd July 5, 2023
.Dt MEMFD_CREATE 2
.Os
.Sh NAME
.Nm memfd_create
.Nd create anonymous files
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
.In sys/mman.h
.Ft int
.Fn memfd_create "const char *name" "unsigned int flags"
.Sh DESCRIPTION
The
.Fn memfd_create
system call returns a file descriptor to a file named
.Fa name
backed only by RAM.
Initially, the size of the file is zero.
.Pp
The length of
.Fa name
must not exceed
.Dv NAME_MAX-6
characters in length, to allow for the prefix
.Dq memfd:
to be added.
But since the file descriptor does not live on disk,
.Fa name
does not have to be unique.
.Fa name
is only intended to be used for debugging purposes and commands like
.Xr fstat 1 .
.Pp
Additionally, any of the following may be specified as the
.Fa flags :
.Bl -tag -width MFD_ALLOW_SEALING
.It Dv MFD_CLOEXEC
Set the
.Xr close 2
on
.Xr exec 3
flag.
.It Dv MFD_ALLOW_SEALING
Allow adding seals to the file descriptor using the
.Xr fcntl 2
.Dv F_ADD_SEALS
command.
.El
.Pp
Otherwise, the returned file descriptor behaves the same as a regular file,
including the ability to be mapped by
.Xr mmap 2 .
.Sh RETURN VALUES
If successful, the
.Fn memfd_create
system call returns a non-negative integer.
On failure -1 is returned and
.Fa errno
is set to indicate the error.
.Sh ERRORS
.Fn memfd_create
will fail if:
.Bl -tag -width Er
.It Bq Er EFAULT
The argument
.Fa name
is
.Dv NULL
or points to invalid memory.
.It Bq Er EINVAL
The argument
.Fa flags
has any bits set other than
.Dv MFD_CLOEXEC
or
.Dv MFD_ALLOW_SEALING .
.It Bq Er ENAMETOOLONG
The length of
.Fa name
appended with the prefix
.Dq memfd:
would exceed
.Dv NAME_MAX .
.It Bq Er ENFILE
The system file table is full.
.El
.Sh SEE ALSO
.Xr fcntl 2 ,
.Xr mmap 2 ,
.Xr shmget 2 ,
.Xr shm_open 3
.Sh HISTORY
.Fn memfd_create
is compatible with the Linux system call of the same name that first appeared in
Linux 3.17.

View File

@ -1,4 +1,4 @@
$NetBSD: syscalls.master,v 1.67 2021/12/02 04:29:48 ryo Exp $
$NetBSD: syscalls.master,v 1.68 2023/07/10 02:31:55 christos Exp $
; @(#)syscalls.master 8.1 (Berkeley) 7/19/93
@ -567,6 +567,145 @@
311 UNIMPL process_vm_writev
312 UNIMPL kcmp
313 UNIMPL finit_module
314 UNIMPL sched_setattr
315 UNIMPL sched_getattr
316 UNIMPL renameat2
317 UNIMPL seccomp
318 NOARGS { ssize_t|sys||getrandom(void *buf, size_t buflen, \
unsigned int flags); }
319 STD { int|linux_sys||memfd_create(const char *name, \
unsigned int flags); }
320 UNIMPL kexec_file_load
321 UNIMPL bpf
322 UNIMPL execveat
323 UNIMPL userfaultfd
324 UNIMPL membarrier
325 UNIMPL mlock2
326 UNIMPL copy_file_range
327 UNIMPL preadv2
328 UNIMPL pwritev2
329 UNIMPL pkey_mprotect
330 UNIMPL pkey_alloc
331 UNIMPL pkey_free
332 UNIMPL statx
333 UNIMPL io_pgetevents
334 UNIMPL rseq
335 UNIMPL
336 UNIMPL
337 UNIMPL
338 UNIMPL
339 UNIMPL
340 UNIMPL
341 UNIMPL
342 UNIMPL
343 UNIMPL
344 UNIMPL
345 UNIMPL
346 UNIMPL
347 UNIMPL
348 UNIMPL
349 UNIMPL
350 UNIMPL
351 UNIMPL
352 UNIMPL
353 UNIMPL
354 UNIMPL
355 UNIMPL
356 UNIMPL
357 UNIMPL
358 UNIMPL
359 UNIMPL
360 UNIMPL
361 UNIMPL
362 UNIMPL
363 UNIMPL
364 UNIMPL
365 UNIMPL
366 UNIMPL
367 UNIMPL
368 UNIMPL
369 UNIMPL
370 UNIMPL
371 UNIMPL
372 UNIMPL
373 UNIMPL
374 UNIMPL
375 UNIMPL
376 UNIMPL
377 UNIMPL
378 UNIMPL
379 UNIMPL
380 UNIMPL
381 UNIMPL
382 UNIMPL
383 UNIMPL
384 UNIMPL
385 UNIMPL
386 UNIMPL
387 UNIMPL
388 UNIMPL
389 UNIMPL
390 UNIMPL
391 UNIMPL
392 UNIMPL
393 UNIMPL
394 UNIMPL
395 UNIMPL
396 UNIMPL
397 UNIMPL
398 UNIMPL
399 UNIMPL
400 UNIMPL
401 UNIMPL
402 UNIMPL
403 UNIMPL
404 UNIMPL
405 UNIMPL
406 UNIMPL
407 UNIMPL
408 UNIMPL
409 UNIMPL
410 UNIMPL
411 UNIMPL
412 UNIMPL
413 UNIMPL
414 UNIMPL
415 UNIMPL
416 UNIMPL
417 UNIMPL
418 UNIMPL
419 UNIMPL
420 UNIMPL
421 UNIMPL
422 UNIMPL
423 UNIMPL
424 UNIMPL pidfd_send_signal
425 UNIMPL io_uring_setup
426 UNIMPL io_uring_enter
427 UNIMPL io_uring_register
428 UNIMPL open_tree
429 UNIMPL move_mount
430 UNIMPL fsopen
431 UNIMPL fsconfig
432 UNIMPL fsmount
433 UNIMPL fspick
434 UNIMPL pidfd_open
435 UNIMPL clone3
436 UNIMPL close_range
437 UNIMPL openat2
438 UNIMPL pidfd_getfd
439 UNIMPL faccessat2
440 UNIMPL process_madvise
441 UNIMPL epoll_pwait2
442 UNIMPL mount_setattr
443 UNIMPL quotactl_fd
444 UNIMPL landlock_create_ruleset
445 UNIMPL landlock_add_rule
446 UNIMPL landlock_restrict_self
447 UNIMPL memfd_secret
448 UNIMPL process_mrelease
449 UNIMPL futex_waitv
450 UNIMPL set_mempolicy_home_node
; we want a "nosys" syscall, we'll just add an extra entry for it.
314 STD { int|linux_sys||nosys(void); }
451 STD { int|linux_sys||nosys(void); }

View File

@ -1,4 +1,4 @@
/* $NetBSD: linux_fcntl.h,v 1.20 2021/11/25 02:27:08 ryo Exp $ */
/* $NetBSD: linux_fcntl.h,v 1.21 2023/07/10 02:31:55 christos Exp $ */
/*-
* Copyright (c) 1995, 1998 The NetBSD Foundation, Inc.
@ -100,6 +100,8 @@ struct linux_flock64 {
#define LINUX_F_DUPFD_CLOEXEC (LINUX_F_SPECIFIC_BASE + 6)
#define LINUX_F_SETPIPE_SZ (LINUX_F_SPECIFIC_BASE + 7)
#define LINUX_F_GETPIPE_SZ (LINUX_F_SPECIFIC_BASE + 8)
#define LINUX_F_ADD_SEALS (LINUX_F_SPECIFIC_BASE + 9)
#define LINUX_F_GET_SEALS (LINUX_F_SPECIFIC_BASE + 10)
/*
* We have to have 4 copies of the code that converts linux fcntl() file

View File

@ -1,4 +1,4 @@
/* $NetBSD: linux_file.c,v 1.122 2021/11/25 03:08:04 ryo Exp $ */
/* $NetBSD: linux_file.c,v 1.123 2023/07/10 02:31:55 christos Exp $ */
/*-
* Copyright (c) 1995, 1998, 2008 The NetBSD Foundation, Inc.
@ -35,7 +35,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: linux_file.c,v 1.122 2021/11/25 03:08:04 ryo Exp $");
__KERNEL_RCSID(0, "$NetBSD: linux_file.c,v 1.123 2023/07/10 02:31:55 christos Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -440,6 +440,14 @@ linux_sys_fcntl(struct lwp *l, const struct linux_sys_fcntl_args *uap, register_
cmd = F_DUPFD_CLOEXEC;
break;
case LINUX_F_ADD_SEALS:
cmd = F_ADD_SEALS;
break;
case LINUX_F_GET_SEALS:
cmd = F_GET_SEALS;
break;
default:
return EOPNOTSUPP;
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: linux_misc.c,v 1.256 2021/12/02 04:29:48 ryo Exp $ */
/* $NetBSD: linux_misc.c,v 1.257 2023/07/10 02:31:55 christos Exp $ */
/*-
* Copyright (c) 1995, 1998, 1999, 2008 The NetBSD Foundation, Inc.
@ -57,7 +57,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: linux_misc.c,v 1.256 2021/12/02 04:29:48 ryo Exp $");
__KERNEL_RCSID(0, "$NetBSD: linux_misc.c,v 1.257 2023/07/10 02:31:55 christos Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -169,9 +169,9 @@ const struct linux_mnttypes linux_fstypes[] = {
const int linux_fstypes_cnt = sizeof(linux_fstypes) / sizeof(linux_fstypes[0]);
# ifdef DEBUG_LINUX
#define DPRINTF(a) uprintf a
#define DPRINTF(a) uprintf a
# else
#define DPRINTF(a)
#define DPRINTF(a)
# endif
/* Local linux_misc.c functions: */
@ -1681,3 +1681,66 @@ linux_sys_eventfd2(struct lwp *l, const struct linux_sys_eventfd2_args *uap,
return linux_do_eventfd2(l, SCARG(uap, initval), SCARG(uap, flags),
retval);
}
#define LINUX_MFD_CLOEXEC 0x0001U
#define LINUX_MFD_ALLOW_SEALING 0x0002U
#define LINUX_MFD_HUGETLB 0x0004U
#define LINUX_MFD_NOEXEC_SEAL 0x0008U
#define LINUX_MFD_EXEC 0x0010U
#define LINUX_MFD_HUGE_FLAGS (0x3f << 26)
#define LINUX_MFD_ALL_FLAGS (LINUX_MFD_CLOEXEC|LINUX_MFD_ALLOW_SEALING \
|LINUX_MFD_HUGETLB|LINUX_MFD_NOEXEC_SEAL \
|LINUX_MFD_EXEC|LINUX_MFD_HUGE_FLAGS)
#define LINUX_MFD_KNOWN_FLAGS (LINUX_MFD_CLOEXEC|LINUX_MFD_ALLOW_SEALING)
#define LINUX_MFD_NAME_MAX 249
/*
* memfd_create(2). Do some error checking and then call NetBSD's
* version.
*/
int
linux_sys_memfd_create(struct lwp *l,
const struct linux_sys_memfd_create_args *uap, register_t *retval)
{
/* {
syscallarg(const char *) name;
syscallarg(unsigned int) flags;
} */
int error;
char *pbuf;
struct sys_memfd_create_args muap;
const unsigned int lflags = SCARG(uap, flags);
KASSERT(LINUX_MFD_NAME_MAX < NAME_MAX); /* sanity check */
if (lflags & ~LINUX_MFD_ALL_FLAGS)
return EINVAL;
if ((lflags & LINUX_MFD_HUGE_FLAGS) != 0 &&
(lflags & LINUX_MFD_HUGETLB) == 0)
return EINVAL;
if ((lflags & LINUX_MFD_HUGETLB) && (lflags & LINUX_MFD_ALLOW_SEALING))
return EINVAL;
/* Linux has a stricter limit for name size */
pbuf = PNBUF_GET();
error = copyinstr(SCARG(uap, name), pbuf, LINUX_MFD_NAME_MAX+1, NULL);
PNBUF_PUT(pbuf);
pbuf = NULL;
if (error != 0) {
if (error == ENAMETOOLONG)
error = EINVAL;
return error;
}
if (lflags & ~LINUX_MFD_KNOWN_FLAGS) {
DPRINTF(("linux_sys_memfd_create: ignored flags %x\n",
lflags & ~LINUX_MFD_KNOWN_FLAGS));
}
SCARG(&muap, name) = SCARG(uap, name);
SCARG(&muap, flags) = lflags & LINUX_MFD_KNOWN_FLAGS;
return sys_memfd_create(l, &muap, retval);
}

View File

@ -1,4 +1,4 @@
/* $NetBSD: linux_sysctl.c,v 1.47 2021/09/23 06:56:27 ryo Exp $ */
/* $NetBSD: linux_sysctl.c,v 1.48 2023/07/10 02:31:55 christos Exp $ */
/*-
* Copyright (c) 2003, 2008 The NetBSD Foundation, Inc.
@ -34,7 +34,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: linux_sysctl.c,v 1.47 2021/09/23 06:56:27 ryo Exp $");
__KERNEL_RCSID(0, "$NetBSD: linux_sysctl.c,v 1.48 2023/07/10 02:31:55 christos Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -57,8 +57,8 @@ __KERNEL_RCSID(0, "$NetBSD: linux_sysctl.c,v 1.47 2021/09/23 06:56:27 ryo Exp $"
#include <compat/linux/common/linux_machdep.h>
char linux_sysname[128] = "Linux";
char linux_release[128] = "3.11.6";
char linux_version[128] = "#1 SMP PREEMPT Thu Oct 24 16:23:02 UTC 2013";
char linux_release[128] = "6.3.10";
char linux_version[128] = "#1 SMP PREEMPT_DYNAMIC Wed Jun 28 18:34:30 UTC 2023";
struct sysctlnode linux_sysctl_root = {
.sysctl_flags = SYSCTL_VERSION|

View File

@ -1,4 +1,4 @@
/* $NetBSD: sys_descrip.c,v 1.47 2023/05/14 09:29:58 riastradh Exp $ */
/* $NetBSD: sys_descrip.c,v 1.48 2023/07/10 02:31:55 christos Exp $ */
/*-
* Copyright (c) 2008, 2020 The NetBSD Foundation, Inc.
@ -67,7 +67,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.47 2023/05/14 09:29:58 riastradh Exp $");
__KERNEL_RCSID(0, "$NetBSD: sys_descrip.c,v 1.48 2023/07/10 02:31:55 christos Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@ -315,26 +315,6 @@ out: if (fp)
return error;
}
static int
do_fcntl_getpath(struct lwp *l, file_t *fp, char *upath)
{
char *kpath;
int error;
if (fp->f_type != DTYPE_VNODE)
return EOPNOTSUPP;
kpath = PNBUF_GET();
error = vnode_to_path(kpath, MAXPATHLEN, fp->f_vnode, l, l->l_proc);
if (!error)
error = copyoutstr(kpath, upath, MAXPATHLEN, NULL);
PNBUF_PUT(kpath);
return error;
}
/*
* The file control system call.
*/
@ -350,6 +330,7 @@ sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval)
filedesc_t *fdp;
fdtab_t *dt;
file_t *fp;
char *kpath;
struct flock fl;
bool cloexec = false;
@ -486,7 +467,30 @@ sys_fcntl(struct lwp *l, const struct sys_fcntl_args *uap, register_t *retval)
break;
case F_GETPATH:
error = do_fcntl_getpath(l, fp, SCARG(uap, arg));
kpath = PNBUF_GET();
/* vnodes need extra context, so are handled separately */
if (fp->f_type == DTYPE_VNODE)
error = vnode_to_path(kpath, MAXPATHLEN, fp->f_vnode,
l, l->l_proc);
else
error = (*fp->f_ops->fo_fcntl)(fp, F_GETPATH, kpath);
if (error == 0)
error = copyoutstr(kpath, SCARG(uap, arg), MAXPATHLEN,
NULL);
PNBUF_PUT(kpath);
break;
case F_ADD_SEALS:
tmp = (int)(uintptr_t) SCARG(uap, arg);
error = (*fp->f_ops->fo_fcntl)(fp, F_ADD_SEALS, &tmp);
break;
case F_GET_SEALS:
error = (*fp->f_ops->fo_fcntl)(fp, F_GET_SEALS, &tmp);
*retval = tmp;
break;
default:

408
sys/kern/sys_memfd.c Normal file
View File

@ -0,0 +1,408 @@
/* $NetBSD: sys_memfd.c,v 1.1 2023/07/10 02:31:55 christos Exp $ */
/*-
* Copyright (c) 2023 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Theodore Preduta.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: sys_memfd.c,v 1.1 2023/07/10 02:31:55 christos Exp $");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/mman.h>
#include <sys/syscallargs.h>
#include <uvm/uvm_extern.h>
#include <uvm/uvm_object.h>
#define F_SEAL_ANY_WRITE (F_SEAL_WRITE|F_SEAL_FUTURE_WRITE)
#define MFD_KNOWN_SEALS (F_SEAL_SEAL|F_SEAL_SHRINK|F_SEAL_GROW \
|F_SEAL_WRITE|F_SEAL_FUTURE_WRITE)
static const char memfd_prefix[] = "memfd:";
static int memfd_read(file_t *, off_t *, struct uio *, kauth_cred_t, int);
static int memfd_write(file_t *, off_t *, struct uio *, kauth_cred_t, int);
static int memfd_ioctl(file_t *, u_long, void *);
static int memfd_fcntl(file_t *, u_int, void *);
static int memfd_stat(file_t *, struct stat *);
static int memfd_close(file_t *);
static int memfd_mmap(file_t *, off_t *, size_t, int, int *, int *,
struct uvm_object **, int *);
static int memfd_seek(file_t *, off_t, int, off_t *, int);
static int memfd_truncate(file_t *, off_t);
static const struct fileops memfd_fileops = {
.fo_name = "memfd",
.fo_read = memfd_read,
.fo_write = memfd_write,
.fo_ioctl = memfd_ioctl,
.fo_fcntl = memfd_fcntl,
.fo_poll = fnullop_poll,
.fo_stat = memfd_stat,
.fo_close = memfd_close,
.fo_kqfilter = fnullop_kqfilter,
.fo_restart = fnullop_restart,
.fo_mmap = memfd_mmap,
.fo_seek = memfd_seek,
.fo_fpathconf = (void *)eopnotsupp,
.fo_posix_fadvise = (void *)eopnotsupp,
.fo_truncate = memfd_truncate,
};
/*
* memfd_create(2). Creat a file descriptor associated with anonymous
* memory.
*/
int
sys_memfd_create(struct lwp *l, const struct sys_memfd_create_args *uap,
register_t *retval)
{
/* {
syscallarg(const char *) name;
syscallarg(unsigned int) flags;
} */
int error, fd;
file_t *fp;
struct memfd *mfd;
struct proc *p = l->l_proc;
const unsigned int flags = SCARG(uap, flags);
KASSERT(NAME_MAX - sizeof(memfd_prefix) > 0); /* sanity check */
if (flags & ~(MFD_CLOEXEC|MFD_ALLOW_SEALING))
return EINVAL;
mfd = kmem_zalloc(sizeof(*mfd), KM_SLEEP);
mfd->mfd_size = 0;
mfd->mfd_uobj = uao_create(INT64_MAX - PAGE_SIZE, 0); /* same as tmpfs */
mutex_init(&mfd->mfd_lock, MUTEX_DEFAULT, IPL_NONE);
strcpy(mfd->mfd_name, memfd_prefix);
error = copyinstr(SCARG(uap, name),
&mfd->mfd_name[sizeof(memfd_prefix) - 1],
sizeof(mfd->mfd_name) - sizeof(memfd_prefix), NULL);
if (error != 0)
goto leave;
getnanotime(&mfd->mfd_btime);
if ((flags & MFD_ALLOW_SEALING) == 0)
mfd->mfd_seals |= F_SEAL_SEAL;
error = fd_allocfile(&fp, &fd);
if (error != 0)
goto leave;
fp->f_flag = FREAD|FWRITE;
fp->f_type = DTYPE_MEMFD;
fp->f_ops = &memfd_fileops;
fp->f_memfd = mfd;
fd_set_exclose(l, fd, (flags & MFD_CLOEXEC) != 0);
fd_affix(p, fp, fd);
*retval = fd;
return 0;
leave:
uao_detach(mfd->mfd_uobj);
kmem_free(mfd, sizeof(*mfd));
return error;
}
static int
memfd_read(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
int flags)
{
int error;
vsize_t todo;
struct memfd *mfd = fp->f_memfd;
if (offp == &fp->f_offset)
mutex_enter(&fp->f_lock);
if (*offp < 0) {
error = EINVAL;
goto leave;
}
/* Trying to read past the end does nothing. */
if (*offp >= mfd->mfd_size) {
error = 0;
goto leave;
}
uio->uio_offset = *offp;
todo = MIN(uio->uio_resid, mfd->mfd_size - *offp);
error = ubc_uiomove(mfd->mfd_uobj, uio, todo, UVM_ADV_SEQUENTIAL,
UBC_READ|UBC_PARTIALOK);
leave:
if (offp == &fp->f_offset)
mutex_exit(&fp->f_lock);
getnanotime(&mfd->mfd_atime);
return error;
}
static int
memfd_write(file_t *fp, off_t *offp, struct uio *uio, kauth_cred_t cred,
int flags)
{
int error;
vsize_t todo;
struct memfd *mfd = fp->f_memfd;
if (mfd->mfd_seals & F_SEAL_ANY_WRITE)
return EPERM;
if (offp == &fp->f_offset)
mutex_enter(&fp->f_lock);
if (*offp < 0) {
error = EINVAL;
goto leave;
}
uio->uio_offset = *offp;
todo = uio->uio_resid;
if (mfd->mfd_seals & F_SEAL_GROW) {
if (*offp >= mfd->mfd_size) {
error = EPERM;
goto leave;
}
/* Truncate the write to fit in mfd_size */
if (*offp + uio->uio_resid >= mfd->mfd_size)
todo = mfd->mfd_size - *offp;
} else if (*offp + uio->uio_resid >= mfd->mfd_size) {
/* Grow to accommodate the write request. */
error = memfd_truncate(fp, *offp + uio->uio_resid);
if (error != 0)
goto leave;
}
error = ubc_uiomove(mfd->mfd_uobj, uio, todo, UVM_ADV_SEQUENTIAL,
UBC_WRITE|UBC_PARTIALOK);
getnanotime(&mfd->mfd_mtime);
leave:
if (offp == &fp->f_offset)
mutex_exit(&fp->f_lock);
return error;
}
static int
memfd_ioctl(file_t *fp, u_long cmd, void *data)
{
return EINVAL;
}
static int
memfd_fcntl(file_t *fp, u_int cmd, void *data)
{
struct memfd *mfd = fp->f_memfd;
switch (cmd) {
case F_ADD_SEALS:
if (mfd->mfd_seals & F_SEAL_SEAL)
return EPERM;
if (*(int *)data & ~MFD_KNOWN_SEALS)
return EINVAL;
/*
* Can only add F_SEAL_WRITE if there are no currently
* open mmaps.
*
* XXX should only disallow if there are no currently
* open mmaps with PROT_WRITE.
*/
if ((mfd->mfd_seals & F_SEAL_WRITE) == 0 &&
(*(int *)data & F_SEAL_WRITE) != 0 &&
mfd->mfd_uobj->uo_refs > 1)
return EBUSY;
mfd->mfd_seals |= *(int *)data;
return 0;
case F_GET_SEALS:
*(int *)data = mfd->mfd_seals;
return 0;
default:
return EINVAL;
}
}
static int
memfd_stat(file_t *fp, struct stat *st)
{
struct memfd *mfd = fp->f_memfd;
memset(st, 0, sizeof(*st));
st->st_uid = kauth_cred_geteuid(fp->f_cred);
st->st_gid = kauth_cred_getegid(fp->f_cred);
st->st_size = mfd->mfd_size;
st->st_mode = S_IREAD;
if ((mfd->mfd_seals & F_SEAL_ANY_WRITE) == 0)
st->st_mode |= S_IWRITE;
st->st_birthtimespec = mfd->mfd_btime;
st->st_ctimespec = mfd->mfd_mtime;
st->st_atimespec = mfd->mfd_atime;
st->st_mtimespec = mfd->mfd_mtime;
return 0;
}
static int
memfd_close(file_t *fp)
{
struct memfd *mfd = fp->f_memfd;
uao_detach(mfd->mfd_uobj);
mutex_destroy(&mfd->mfd_lock);
kmem_free(mfd, sizeof(*mfd));
fp->f_memfd = NULL;
return 0;
}
static int
memfd_mmap(file_t *fp, off_t *offp, size_t size, int prot, int *flagsp,
int *advicep, struct uvm_object **uobjp, int *maxprotp)
{
struct memfd *mfd = fp->f_memfd;
/* uvm_mmap guarantees page-aligned offset and size. */
KASSERT(*offp == round_page(*offp));
KASSERT(size == round_page(size));
KASSERT(size > 0);
if (*offp < 0)
return EINVAL;
if (*offp + size > mfd->mfd_size)
return EINVAL;
if ((mfd->mfd_seals & F_SEAL_ANY_WRITE) &&
(prot & VM_PROT_WRITE) && (*flagsp & MAP_PRIVATE) == 0)
return EPERM;
uao_reference(fp->f_memfd->mfd_uobj);
*uobjp = fp->f_memfd->mfd_uobj;
*maxprotp = prot;
*advicep = UVM_ADV_RANDOM;
return 0;
}
static int
memfd_seek(file_t *fp, off_t delta, int whence, off_t *newoffp,
int flags)
{
off_t newoff;
int error;
switch (whence) {
case SEEK_CUR:
newoff = fp->f_offset + delta;
break;
case SEEK_END:
newoff = fp->f_memfd->mfd_size + delta;
break;
case SEEK_SET:
newoff = delta;
break;
default:
error = EINVAL;
return error;
}
if (newoffp)
*newoffp = newoff;
if (flags & FOF_UPDATE_OFFSET)
fp->f_offset = newoff;
return 0;
}
static int
memfd_truncate(file_t *fp, off_t length)
{
struct memfd *mfd = fp->f_memfd;
int error = 0;
voff_t start, end;
if (length < 0)
return EINVAL;
if (length == mfd->mfd_size)
return 0;
if ((mfd->mfd_seals & F_SEAL_SHRINK) && length < mfd->mfd_size)
return EPERM;
if ((mfd->mfd_seals & F_SEAL_GROW) && length > mfd->mfd_size)
return EPERM;
mutex_enter(&mfd->mfd_lock);
if (length > mfd->mfd_size)
ubc_zerorange(mfd->mfd_uobj, mfd->mfd_size,
length - mfd->mfd_size, 0);
else {
/* length < mfd->mfd_size, so try to get rid of excess pages */
start = round_page(length);
end = round_page(mfd->mfd_size);
if (start < end) { /* we actually have pages to remove */
rw_enter(mfd->mfd_uobj->vmobjlock, RW_WRITER);
error = (*mfd->mfd_uobj->pgops->pgo_put)(mfd->mfd_uobj,
start, end, PGO_FREE);
/* pgo_put drops vmobjlock */
}
}
getnanotime(&mfd->mfd_mtime);
mfd->mfd_size = length;
mutex_exit(&mfd->mfd_lock);
return error;
}

View File

@ -1,4 +1,4 @@
$NetBSD: syscalls.master,v 1.309 2021/11/01 05:07:17 thorpej Exp $
$NetBSD: syscalls.master,v 1.310 2023/07/10 02:33:04 christos Exp $
; @(#)syscalls.master 8.2 (Berkeley) 1/13/94
@ -1049,3 +1049,5 @@
498 STD { int|sys||__acl_aclcheck_fd(int filedes, \
acl_type_t type, struct acl *aclp); }
499 STD RUMP { long|sys||lpathconf(const char *path, int name); }
500 STD { int|sys||memfd_create(const char *name, \
unsigned int flags); }

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_syscalls.c,v 1.559 2023/04/29 06:34:20 riastradh Exp $ */
/* $NetBSD: vfs_syscalls.c,v 1.560 2023/07/10 02:31:55 christos Exp $ */
/*-
* Copyright (c) 2008, 2009, 2019, 2020 The NetBSD Foundation, Inc.
@ -70,7 +70,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.559 2023/04/29 06:34:20 riastradh Exp $");
__KERNEL_RCSID(0, "$NetBSD: vfs_syscalls.c,v 1.560 2023/07/10 02:31:55 christos Exp $");
#ifdef _KERNEL_OPT
#include "opt_fileassoc.h"
@ -4149,34 +4149,19 @@ sys_ftruncate(struct lwp *l, const struct sys_ftruncate_args *uap, register_t *r
syscallarg(int) pad;
syscallarg(off_t) length;
} */
struct vattr vattr;
struct vnode *vp;
file_t *fp;
int error;
int error, fd = SCARG(uap, fd);
if (SCARG(uap, length) < 0)
return EINVAL;
fp = fd_getfile(fd);
if (fp == NULL)
return EBADF;
if (fp->f_ops->fo_truncate == NULL)
error = EOPNOTSUPP;
else
error = (*fp->f_ops->fo_truncate)(fp, SCARG(uap, length));
/* fd_getvnode() will use the descriptor for us */
if ((error = fd_getvnode(SCARG(uap, fd), &fp)) != 0)
return (error);
if ((fp->f_flag & FWRITE) == 0) {
error = EINVAL;
goto out;
}
vp = fp->f_vnode;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (vp->v_type == VDIR)
error = EISDIR;
else if ((error = vn_writechk(vp)) == 0) {
vattr_null(&vattr);
vattr.va_size = SCARG(uap, length);
error = VOP_SETATTR(vp, &vattr, fp->f_cred);
}
VOP_UNLOCK(vp);
out:
fd_putfile(SCARG(uap, fd));
return (error);
fd_putfile(fd);
return error;
}
/*

View File

@ -1,4 +1,4 @@
/* $NetBSD: vfs_vnops.c,v 1.241 2023/04/22 13:53:02 riastradh Exp $ */
/* $NetBSD: vfs_vnops.c,v 1.242 2023/07/10 02:31:55 christos Exp $ */
/*-
* Copyright (c) 2009 The NetBSD Foundation, Inc.
@ -66,7 +66,7 @@
*/
#include <sys/cdefs.h>
__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.241 2023/04/22 13:53:02 riastradh Exp $");
__KERNEL_RCSID(0, "$NetBSD: vfs_vnops.c,v 1.242 2023/07/10 02:31:55 christos Exp $");
#include "veriexec.h"
@ -125,6 +125,7 @@ static int vn_seek(struct file *, off_t, int, off_t *, int);
static int vn_advlock(struct file *, void *, int, struct flock *, int);
static int vn_fpathconf(struct file *, int, register_t *);
static int vn_posix_fadvise(struct file *, off_t, off_t, int);
static int vn_truncate(file_t *, off_t);
const struct fileops vnops = {
.fo_name = "vn",
@ -142,6 +143,7 @@ const struct fileops vnops = {
.fo_advlock = vn_advlock,
.fo_fpathconf = vn_fpathconf,
.fo_posix_fadvise = vn_posix_fadvise,
.fo_truncate = vn_truncate,
};
/*
@ -1331,6 +1333,33 @@ vn_posix_fadvise(struct file *fp, off_t offset, off_t len, int advice)
return error;
}
static int
vn_truncate(file_t *fp, off_t length)
{
struct vattr vattr;
struct vnode *vp;
int error = 0;
if (length < 0)
return EINVAL;
if ((fp->f_flag & FWRITE) == 0)
return EINVAL;
vp = fp->f_vnode;
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
if (vp->v_type == VDIR)
error = EISDIR;
else if ((error = vn_writechk(vp)) == 0) {
vattr_null(&vattr);
vattr.va_size = length;
error = VOP_SETATTR(vp, &vattr, fp->f_cred);
}
VOP_UNLOCK(vp);
return error;
}
/*
* Check that the vnode is still valid, and if so
* acquire requested lock.

View File

@ -1,4 +1,4 @@
/* $NetBSD: fcntl.h,v 1.54 2020/03/30 20:17:42 kamil Exp $ */
/* $NetBSD: fcntl.h,v 1.55 2023/07/10 02:31:55 christos Exp $ */
/*-
* Copyright (c) 1983, 1990, 1993
@ -200,6 +200,8 @@
#define F_GETNOSIGPIPE 13 /* get SIGPIPE disposition */
#define F_SETNOSIGPIPE 14 /* set SIGPIPE disposition */
#define F_GETPATH 15 /* get pathname associated with fd */
#define F_ADD_SEALS 16 /* set seals */
#define F_GET_SEALS 17 /* get seals */
#endif
/* file descriptor flags (F_GETFD, F_SETFD) */
@ -215,6 +217,15 @@
#define F_POSIX 0x040 /* Use POSIX semantics for lock */
#endif
/* types of seals (F_ADD_SEALS, F_GET_SEALS) */
#if defined(_NETBSD_SOURCE)
#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
#define F_SEAL_GROW 0x0004 /* prevent file from growing */
#define F_SEAL_WRITE 0x0008 /* prevent writes */
#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */
#endif
/* Constants for fcntl's passed to the underlying fs - like ioctl's. */
#if defined(_NETBSD_SOURCE)
#define F_PARAM_MASK 0xfff

View File

@ -1,4 +1,4 @@
/* $NetBSD: file.h,v 1.92 2023/04/22 13:53:02 riastradh Exp $ */
/* $NetBSD: file.h,v 1.93 2023/07/10 02:31:55 christos Exp $ */
/*-
* Copyright (c) 2009 The NetBSD Foundation, Inc.
@ -103,6 +103,7 @@ struct fileops {
int (*fo_fpathconf) (struct file *, int, register_t *);
int (*fo_posix_fadvise)
(struct file *, off_t, off_t, int);
int (*fo_truncate) (struct file *, off_t);
};
union file_data {
@ -121,6 +122,7 @@ union file_data {
struct mqueue *fd_mq; // DTYPE_MQUEUE
struct ksem *fd_ks; // DTYPE_SEM
struct iscsifd *fd_iscsi; // DTYPE_MISC (iscsi)
struct memfd *fd_memfd; // DTYPE_MEMFD
};
/*
@ -160,6 +162,7 @@ struct file {
#define f_ksem f_undata.fd_ks
#define f_eventfd f_undata.fd_eventfd
#define f_timerfd f_undata.fd_timerfd
#define f_memfd f_undata.fd_memfd
#define f_rndctx f_undata.fd_rndctx
#define f_audioctx f_undata.fd_audioctx
@ -184,10 +187,11 @@ struct file {
#define DTYPE_SEM 8 /* semaphore */
#define DTYPE_EVENTFD 9 /* eventfd */
#define DTYPE_TIMERFD 10 /* timerfd */
#define DTYPE_MEMFD 11 /* memfd */
#define DTYPE_NAMES \
"0", "file", "socket", "pipe", "kqueue", "misc", "crypto", "mqueue", \
"semaphore", "eventfd", "timerfd"
"semaphore", "eventfd", "timerfd", "memfd"
#ifdef _KERNEL

View File

@ -1,4 +1,4 @@
/* $NetBSD: mman.h,v 1.62 2019/12/06 19:37:43 christos Exp $ */
/* $NetBSD: mman.h,v 1.63 2023/07/10 02:31:55 christos Exp $ */
/*-
* Copyright (c) 1982, 1986, 1993
@ -212,7 +212,13 @@ typedef __off_t off_t; /* file offset */
implemented in UVM */
#define MAP_INHERIT_ZERO 4 /* zero in child */
#define MAP_INHERIT_DEFAULT MAP_INHERIT_COPY
#endif
/*
* Flags to memfd_create
*/
#define MFD_CLOEXEC 0x1U
#define MFD_ALLOW_SEALING 0x2U
#endif /* _NETBSD_SOURCE */
#ifndef _KERNEL
@ -234,12 +240,31 @@ int madvise(void *, size_t, int);
int mincore(void *, size_t, char *);
int minherit(void *, size_t, int);
void * mremap(void *, size_t, void *, size_t, int);
int memfd_create(const char *, unsigned int);
#endif
int posix_madvise(void *, size_t, int);
int shm_open(const char *, int, mode_t);
int shm_unlink(const char *);
__END_DECLS
#else
#include <sys/syslimits.h> /* for NAME_MAX */
#include <sys/timespec.h> /* for struct timespec */
#include <sys/mutex.h> /* for kmutex_t */
struct memfd {
char mfd_name[NAME_MAX+1];
struct uvm_object *mfd_uobj;
size_t mfd_size;
int mfd_seals;
kmutex_t mfd_lock; /* for truncate */
struct timespec mfd_btime;
struct timespec mfd_atime;
struct timespec mfd_mtime;
};
#endif /* !_KERNEL */
#endif /* !_SYS_MMAN_H_ */

View File

@ -1,4 +1,4 @@
/* $NetBSD: fstat.c,v 1.117 2022/10/28 05:27:16 ozaki-r Exp $ */
/* $NetBSD: fstat.c,v 1.118 2023/07/10 02:31:55 christos Exp $ */
/*-
* Copyright (c) 1988, 1993
@ -39,7 +39,7 @@ __COPYRIGHT("@(#) Copyright (c) 1988, 1993\
#if 0
static char sccsid[] = "@(#)fstat.c 8.3 (Berkeley) 5/2/95";
#else
__RCSID("$NetBSD: fstat.c,v 1.117 2022/10/28 05:27:16 ozaki-r Exp $");
__RCSID("$NetBSD: fstat.c,v 1.118 2023/07/10 02:31:55 christos Exp $");
#endif
#endif /* not lint */
@ -548,6 +548,7 @@ ftrans(fdfile_t *fp, int i)
case DTYPE_CRYPTO:
case DTYPE_MQUEUE:
case DTYPE_SEM:
case DTYPE_MEMFD:
if (checkfile == 0)
misctrans(&file, i);
break;

View File

@ -1,4 +1,4 @@
/* $NetBSD: misc.c,v 1.24 2020/09/13 04:14:48 isaki Exp $ */
/* $NetBSD: misc.c,v 1.25 2023/07/10 02:31:55 christos Exp $ */
/*-
* Copyright (c) 2008 The NetBSD Foundation, Inc.
@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
__RCSID("$NetBSD: misc.c,v 1.24 2020/09/13 04:14:48 isaki Exp $");
__RCSID("$NetBSD: misc.c,v 1.25 2023/07/10 02:31:55 christos Exp $");
#include <stdbool.h>
#include <sys/param.h>
@ -56,6 +56,9 @@ __RCSID("$NetBSD: misc.c,v 1.24 2020/09/13 04:14:48 isaki Exp $");
#undef _KERNEL
#include <sys/cprng.h>
#include <sys/vnode.h>
#define _KERNEL
#include <sys/mman.h>
#undef _KERNEL
#include <sys/mount.h>
#include <net/bpfdesc.h>
@ -110,7 +113,9 @@ static struct nlist nl[] = {
{ .n_name = "audio_fileops" },
#define NL_PAD 19
{ .n_name = "pad_fileops" },
#define NL_MAX 20
#define NL_MEMFD 20
{ .n_name = "memfd_fileops" },
#define NL_MAX 21
{ .n_name = NULL }
};
@ -263,6 +268,40 @@ p_audio(struct file *f)
return 0;
}
static int
p_memfd_seal(int seen, int all, int target, const char *name)
{
if (all & target)
(void)printf("%s%s", (seen ? "|" : ""), name);
return seen || (all & target);
}
static int
p_memfd(struct file *f)
{
int seal_yet = 0;
struct memfd mfd;
if (!KVM_READ(f->f_data, &mfd, sizeof(mfd))) {
dprintf("can't read memfd at %p for pid %d", f->f_data, Pid);
return 0;
}
(void)printf("* %s, seals=", mfd.mfd_name);
if (mfd.mfd_seals == 0)
(void)printf("0");
else {
seal_yet = p_memfd_seal(seal_yet, mfd.mfd_seals, F_SEAL_SEAL, "F_SEAL_SEAL");
seal_yet = p_memfd_seal(seal_yet, mfd.mfd_seals, F_SEAL_SHRINK, "F_SEAL_SHRINK");
seal_yet = p_memfd_seal(seal_yet, mfd.mfd_seals, F_SEAL_GROW, "F_SEAL_GROW");
seal_yet = p_memfd_seal(seal_yet, mfd.mfd_seals, F_SEAL_WRITE, "F_SEAL_WRITE");
seal_yet = p_memfd_seal(seal_yet, mfd.mfd_seals, F_SEAL_FUTURE_WRITE, "F_SEAL_FUTURE_WRITE");
}
oprint(f, "\n");
return 0;
}
int
pmisc(struct file *f, const char *name)
{
@ -310,6 +349,8 @@ pmisc(struct file *f, const char *name)
case NL_PAD:
printf("* pad %p", f->f_data);
break;
case NL_MEMFD:
return p_memfd(f);
case NL_MAX:
printf("* %s ops=%p %p", name, f->f_ops, f->f_data);
break;