From 276ef2237848f9276aa522fb1629389cf68b910f Mon Sep 17 00:00:00 2001 From: thorpej Date: Sun, 26 Apr 2020 18:53:31 +0000 Subject: [PATCH] Add a NetBSD native futex implementation, mostly written by riastradh@. Map the COMPAT_LINUX futex calls to the native ones. --- distrib/sets/lists/comp/mi | 3 +- distrib/sets/lists/tests/mi | 4 +- lib/libc/include/futex_private.h | 64 + sys/compat/linux/arch/alpha/syscalls.master | 16 +- sys/compat/linux/arch/amd64/syscalls.master | 16 +- sys/compat/linux/arch/arm/syscalls.master | 16 +- sys/compat/linux/arch/i386/syscalls.master | 11 +- sys/compat/linux/arch/m68k/syscalls.master | 16 +- sys/compat/linux/arch/mips/syscalls.master | 16 +- sys/compat/linux/arch/powerpc/syscalls.master | 16 +- sys/compat/linux/common/linux_emuldata.h | 5 +- sys/compat/linux/common/linux_exec.c | 34 +- sys/compat/linux/common/linux_futex.c | 774 +------ sys/compat/linux/common/linux_futex.h | 93 - sys/compat/linux/common/linux_mod.c | 7 +- sys/compat/linux32/arch/amd64/syscalls.master | 15 +- sys/compat/linux32/common/linux32_misc.c | 70 +- sys/compat/netbsd32/files.netbsd32 | 3 +- sys/compat/netbsd32/netbsd32_futex.c | 134 ++ sys/compat/netbsd32/syscalls.master | 9 +- sys/kern/files.kern | 3 +- sys/kern/init_main.c | 7 +- sys/kern/kern_lwp.c | 14 +- sys/kern/kern_proc.c | 11 +- sys/kern/sys_futex.c | 1977 +++++++++++++++++ sys/kern/syscalls.master | 9 +- sys/sys/Makefile | 4 +- sys/sys/futex.h | 186 ++ sys/sys/lwp.h | 4 +- tests/lib/libc/sys/Makefile | 7 +- tests/lib/libc/sys/t_futex_ops.c | 1294 +++++++++++ tests/lib/libc/sys/t_futex_robust.c | 408 ++++ 32 files changed, 4256 insertions(+), 990 deletions(-) create mode 100644 lib/libc/include/futex_private.h delete mode 100644 sys/compat/linux/common/linux_futex.h create mode 100644 sys/compat/netbsd32/netbsd32_futex.c create mode 100644 sys/kern/sys_futex.c create mode 100644 sys/sys/futex.h create mode 100644 tests/lib/libc/sys/t_futex_ops.c create mode 100644 tests/lib/libc/sys/t_futex_robust.c diff --git a/distrib/sets/lists/comp/mi b/distrib/sets/lists/comp/mi index bd92b700758b..9e2353ec0be8 100644 --- a/distrib/sets/lists/comp/mi +++ b/distrib/sets/lists/comp/mi @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.2321 2020/04/04 19:26:51 christos Exp $ +# $NetBSD: mi,v 1.2322 2020/04/26 18:53:31 thorpej Exp $ # # Note: don't delete entries from here - mark them as "obsolete" instead. ./etc/mtree/set.comp comp-sys-root @@ -3059,6 +3059,7 @@ ./usr/include/sys/float_ieee754.h comp-c-include ./usr/include/sys/fnv_hash.h comp-obsolete obsolete ./usr/include/sys/fstypes.h comp-c-include +./usr/include/sys/futex.h comp-c-include ./usr/include/sys/gcq.h comp-c-include ./usr/include/sys/gmon.h comp-c-include ./usr/include/sys/gpio.h comp-c-include diff --git a/distrib/sets/lists/tests/mi b/distrib/sets/lists/tests/mi index 9c5aa8d313e0..7229b36f31c5 100644 --- a/distrib/sets/lists/tests/mi +++ b/distrib/sets/lists/tests/mi @@ -1,4 +1,4 @@ -# $NetBSD: mi,v 1.836 2020/04/19 13:22:58 maxv Exp $ +# $NetBSD: mi,v 1.837 2020/04/26 18:53:32 thorpej Exp $ # # Note: don't delete entries from here - mark them as "obsolete" instead. # @@ -3120,6 +3120,8 @@ ./usr/tests/lib/libc/sys/t_dup tests-lib-tests compattestfile,atf ./usr/tests/lib/libc/sys/t_fork tests-lib-tests compattestfile,atf ./usr/tests/lib/libc/sys/t_fsync tests-lib-tests compattestfile,atf +./usr/tests/lib/libc/sys/t_futex_ops tests-lib-tests compattestfile,atf +./usr/tests/lib/libc/sys/t_futex_robust tests-lib-tests compattestfile,atf ./usr/tests/lib/libc/sys/t_getcontext tests-lib-tests compattestfile,atf ./usr/tests/lib/libc/sys/t_getgroups tests-lib-tests compattestfile,atf ./usr/tests/lib/libc/sys/t_getitimer tests-lib-tests compattestfile,atf diff --git a/lib/libc/include/futex_private.h b/lib/libc/include/futex_private.h new file mode 100644 index 000000000000..fa3c139721a0 --- /dev/null +++ b/lib/libc/include/futex_private.h @@ -0,0 +1,64 @@ +/* $NetBSD: futex_private.h,v 1.1 2020/04/26 18:53:32 thorpej Exp $ */ + +/*- + * Copyright (c) 2019 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Jason R. Thorpe. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __LIBC_FUTEX_PRIVATE +#define __LIBC_FUTEX_PRIVATE + +#if defined(_LIBC) +#include "namespace.h" +#endif + +#include +#include +#include +#include + +static inline int __unused +__futex(volatile int *uaddr, int op, int val, const struct timespec *timeout, + volatile int *uaddr2, int val2, int val3) +{ + return syscall(SYS___futex, uaddr, op, val, timeout, uaddr2, + val2, val3); +} + +static inline int __unused +__futex_set_robust_list(void *head, size_t len) +{ + return syscall(SYS___futex_set_robust_list, head, len); +} + +static inline int __unused +__futex_get_robust_list(lwpid_t lwpid, void **headp, size_t *lenp) +{ + return syscall(SYS___futex_get_robust_list, lwpid, headp, lenp); +} + +#endif /* __LIBC_FUTEX_PRIVATE */ diff --git a/sys/compat/linux/arch/alpha/syscalls.master b/sys/compat/linux/arch/alpha/syscalls.master index 2331443411bc..f740b086284b 100644 --- a/sys/compat/linux/arch/alpha/syscalls.master +++ b/sys/compat/linux/arch/alpha/syscalls.master @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.96 2019/11/09 23:44:31 jdolecek Exp $ + $NetBSD: syscalls.master,v 1.97 2020/04/26 18:53:32 thorpej Exp $ ; ; @(#)syscalls.master 8.1 (Berkeley) 7/19/93 @@ -740,11 +740,15 @@ struct linux_timespec *timeout, \ linux_sigset_t *sigset); } 465 UNIMPL unshare -466 STD { int|linux_sys||set_robust_list( \ - struct linux_robust_list_head *head, size_t len); } -467 STD { int|linux_sys||get_robust_list(int pid, \ - struct linux_robust_list_head **head, \ - size_t *len); } + ; + ; The NetBSD native robust list calls have different + ; argument names / types, but they are ABI-compatible + ; with Linux. + ; +466 NOARGS { int|sys||__futex_set_robust_list(void *head, \ + size_t len); } +467 NOARGS { int|sys||__futex_get_robust_list(lwpid_t lwpid, \ + void **headp, size_t *lenp); } 468 UNIMPL splice 469 UNIMPL sync_file_range 470 UNIMPL tee diff --git a/sys/compat/linux/arch/amd64/syscalls.master b/sys/compat/linux/arch/amd64/syscalls.master index 7307cf0e7264..739924c2e2e0 100644 --- a/sys/compat/linux/arch/amd64/syscalls.master +++ b/sys/compat/linux/arch/amd64/syscalls.master @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.61 2019/11/09 23:44:31 jdolecek Exp $ + $NetBSD: syscalls.master,v 1.62 2020/04/26 18:53:32 thorpej Exp $ ; @(#)syscalls.master 8.1 (Berkeley) 7/19/93 @@ -497,11 +497,15 @@ struct linux_timespec *timeout, \ linux_sigset_t *sigset); } 272 UNIMPL unshare -273 STD { int|linux_sys||set_robust_list( \ - struct linux_robust_list_head *head, size_t len); } -274 STD { int|linux_sys||get_robust_list(int pid, \ - struct linux_robust_list_head **head, \ - size_t *len); } + ; + ; The NetBSD native robust list calls have different + ; argument names / types, but they are ABI-compatible + ; with Linux. + ; +273 NOARGS { int|sys||__futex_set_robust_list(void *head, \ + size_t len); } +274 NOARGS { int|sys||__futex_get_robust_list(lwpid_t lwpid, \ + void **headp, size_t *lenp); } 275 UNIMPL splice 276 UNIMPL tee 277 UNIMPL sync_file_range diff --git a/sys/compat/linux/arch/arm/syscalls.master b/sys/compat/linux/arch/arm/syscalls.master index c0431b54bd5a..7390862f329e 100644 --- a/sys/compat/linux/arch/arm/syscalls.master +++ b/sys/compat/linux/arch/arm/syscalls.master @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.68 2019/11/09 23:44:31 jdolecek Exp $ + $NetBSD: syscalls.master,v 1.69 2020/04/26 18:53:32 thorpej Exp $ ; Derived from sys/compat/linux/arch/*/syscalls.master ; and from Linux 2.4.12 arch/arm/kernel/calls.S @@ -538,11 +538,15 @@ struct linux_timespec *timeout, \ linux_sigset_t *sigset); } 337 UNIMPL unshare -338 STD { int|linux_sys||set_robust_list( \ - struct linux_robust_list_head *head, size_t len); } -339 STD { int|linux_sys||get_robust_list(int pid, \ - struct linux_robust_list_head **head, \ - size_t *len); } + ; + ; The NetBSD native robust list calls have different + ; argument names / types, but they are ABI-compatible + ; with Linux. + ; +338 NOARGS { int|sys||__futex_set_robust_list(void *head, \ + size_t len); } +339 NOARGS { int|sys||__futex_get_robust_list(lwpid_t lwpid, \ + void **headp, size_t *lenp); } 340 UNIMPL splice 341 UNIMPL sync_file_range2 342 UNIMPL tee diff --git a/sys/compat/linux/arch/i386/syscalls.master b/sys/compat/linux/arch/i386/syscalls.master index 0fa1a46bf426..56a58b1eaced 100644 --- a/sys/compat/linux/arch/i386/syscalls.master +++ b/sys/compat/linux/arch/i386/syscalls.master @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.123 2019/11/09 23:44:32 jdolecek Exp $ + $NetBSD: syscalls.master,v 1.124 2020/04/26 18:53:32 thorpej Exp $ ; @(#)syscalls.master 8.1 (Berkeley) 7/19/93 @@ -511,11 +511,10 @@ struct linux_timespec *timeout, \ linux_sigset_t *sigset); } 310 UNIMPL unshare -311 STD { int|linux_sys||set_robust_list( \ - struct linux_robust_list_head *head, size_t len); } -312 STD { int|linux_sys||get_robust_list(int pid, \ - struct linux_robust_list_head **head, \ - size_t *len); } +311 NOARGS { int|sys||__futex_set_robust_list(void *head, \ + size_t len); } +312 NOARGS { int|sys||__futex_get_robust_list(lwpid_t lwpid, \ + void **headp, size_t *lenp); } 313 UNIMPL splice 314 UNIMPL sync_file_range 315 UNIMPL tee diff --git a/sys/compat/linux/arch/m68k/syscalls.master b/sys/compat/linux/arch/m68k/syscalls.master index f9c1b6cc58bd..271238f86c7f 100644 --- a/sys/compat/linux/arch/m68k/syscalls.master +++ b/sys/compat/linux/arch/m68k/syscalls.master @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.94 2019/11/09 23:44:32 jdolecek Exp $ + $NetBSD: syscalls.master,v 1.95 2020/04/26 18:53:32 thorpej Exp $ ; @(#)syscalls.master 8.1 (Berkeley) 7/19/93 @@ -519,11 +519,15 @@ struct linux_timespec *timeout, \ linux_sigset_t *sigset); } 303 UNIMPL unshare -304 STD { int|linux_sys||set_robust_list( \ - struct linux_robust_list_head *head, size_t len); } -305 STD { int|linux_sys||get_robust_list(int pid, \ - struct linux_robust_list_head **head, \ - size_t *len); } + ; + ; The NetBSD native robust list calls have different + ; argument names / types, but they are ABI-compatible + ; with Linux. + ; +304 NOARGS { int|sys||__futex_set_robust_list(void *head, \ + size_t len); } +305 NOARGS { int|sys||__futex_get_robust_list(lwpid_t lwpid, \ + void **headp, size_t *lenp); } 306 UNIMPL splice 307 UNIMPL sync_file_range 308 UNIMPL tee diff --git a/sys/compat/linux/arch/mips/syscalls.master b/sys/compat/linux/arch/mips/syscalls.master index 48545d4babf7..d91f4c84b55e 100644 --- a/sys/compat/linux/arch/mips/syscalls.master +++ b/sys/compat/linux/arch/mips/syscalls.master @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.67 2019/11/09 23:44:32 jdolecek Exp $ + $NetBSD: syscalls.master,v 1.68 2020/04/26 18:53:32 thorpej Exp $ ; @(#)syscalls.master 8.1 (Berkeley) 7/19/93 @@ -514,11 +514,15 @@ 306 UNIMPL tee 307 UNIMPL vmsplice 308 UNIMPL move_pages -309 STD { int|linux_sys||set_robust_list( \ - struct linux_robust_list_head *head, size_t len); } -310 STD { int|linux_sys||get_robust_list(int pid, \ - struct linux_robust_list_head **head, \ - size_t *len); } + ; + ; The NetBSD native robust list calls have different + ; argument names / types, but they are ABI-compatible + ; with Linux. + ; +309 NOARGS { int|sys||__futex_set_robust_list(void *head, \ + size_t len); } +310 NOARGS { int|sys||__futex_get_robust_list(lwpid_t lwpid, \ + void **headp, size_t *lenp); } 311 UNIMPL kexec_load 312 UNIMPL getcpu 313 UNIMPL epoll_pwait diff --git a/sys/compat/linux/arch/powerpc/syscalls.master b/sys/compat/linux/arch/powerpc/syscalls.master index eb7c68d9ac21..fe332a137fe7 100644 --- a/sys/compat/linux/arch/powerpc/syscalls.master +++ b/sys/compat/linux/arch/powerpc/syscalls.master @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.73 2019/11/09 23:44:32 jdolecek Exp $ + $NetBSD: syscalls.master,v 1.74 2020/04/26 18:53:32 thorpej Exp $ ; @(#)syscalls.master 8.1 (Berkeley) 7/19/93 @@ -514,11 +514,15 @@ linux_umode_t mode); } 298 STD { int|linux_sys||faccessat(int fd, const char *path, \ int amode); } -299 STD { int|linux_sys||set_robust_list( \ - struct linux_robust_list_head *head, size_t len); } -300 STD { int|linux_sys||get_robust_list(int pid, \ - struct linux_robust_list_head **head, \ - size_t *len); } + ; + ; The NetBSD native robust list calls have different + ; argument names / types, but they are ABI-compatible + ; with Linux. + ; +299 NOARGS { int|sys||__futex_set_robust_list(void *head, \ + size_t len); } +300 NOARGS { int|sys||__futex_get_robust_list(lwpid_t lwpid, \ + void **headp, size_t *lenp); } 301 UNIMPL move_pages 302 UNIMPL getcpu 303 UNIMPL epoll_wait diff --git a/sys/compat/linux/common/linux_emuldata.h b/sys/compat/linux/common/linux_emuldata.h index 00d2a8f4c745..b6e66445d09e 100644 --- a/sys/compat/linux/common/linux_emuldata.h +++ b/sys/compat/linux/common/linux_emuldata.h @@ -1,4 +1,4 @@ -/* $NetBSD: linux_emuldata.h,v 1.18 2010/11/02 18:18:07 chs Exp $ */ +/* $NetBSD: linux_emuldata.h,v 1.19 2020/04/26 18:53:33 thorpej Exp $ */ /*- * Copyright (c) 1998,2002 The NetBSD Foundation, Inc. @@ -29,8 +29,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include - #ifndef _COMMON_LINUX_EMULDATA_H #define _COMMON_LINUX_EMULDATA_H @@ -47,7 +45,6 @@ struct linux_emuldata { void *led_child_tidptr; /* Used during clone() */ void *led_clear_tid; /* Own TID to clear on exit */ - struct linux_robust_list_head *led_robust_head; long led_personality; }; diff --git a/sys/compat/linux/common/linux_exec.c b/sys/compat/linux/common/linux_exec.c index 7c4a92233145..b2865d430e29 100644 --- a/sys/compat/linux/common/linux_exec.c +++ b/sys/compat/linux/common/linux_exec.c @@ -1,4 +1,4 @@ -/* $NetBSD: linux_exec.c,v 1.122 2020/04/24 03:22:06 thorpej Exp $ */ +/* $NetBSD: linux_exec.c,v 1.123 2020/04/26 18:53:33 thorpej Exp $ */ /*- * Copyright (c) 1994, 1995, 1998, 2000, 2007, 2008, 2020 @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: linux_exec.c,v 1.122 2020/04/24 03:22:06 thorpej Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux_exec.c,v 1.123 2020/04/26 18:53:33 thorpej Exp $"); #include #include @@ -44,6 +44,7 @@ __KERNEL_RCSID(0, "$NetBSD: linux_exec.c,v 1.122 2020/04/24 03:22:06 thorpej Exp #include #include #include +#include #include #include @@ -61,7 +62,6 @@ __KERNEL_RCSID(0, "$NetBSD: linux_exec.c,v 1.122 2020/04/24 03:22:06 thorpej Exp #include #include #include -#include #include #include @@ -168,32 +168,24 @@ void linux_e_lwp_exit(struct lwp *l) { struct linux_emuldata *led; - struct linux_sys_futex_args cup; register_t retval; int error, zero = 0; led = l->l_emuldata; - if (led->led_clear_tid == NULL) { - return; - } - /* Emulate LINUX_CLONE_CHILD_CLEARTID */ - error = copyout(&zero, led->led_clear_tid, sizeof(zero)); + if (led->led_clear_tid != NULL) { + /* Emulate LINUX_CLONE_CHILD_CLEARTID */ + error = copyout(&zero, led->led_clear_tid, sizeof(zero)); #ifdef DEBUG_LINUX - if (error != 0) - printf("%s: cannot clear TID\n", __func__); + if (error != 0) + printf("%s: cannot clear TID\n", __func__); #endif - SCARG(&cup, uaddr) = led->led_clear_tid; - SCARG(&cup, op) = LINUX_FUTEX_WAKE; - SCARG(&cup, val) = 0x7fffffff; /* Awake everyone */ - SCARG(&cup, timeout) = NULL; - SCARG(&cup, uaddr2) = NULL; - SCARG(&cup, val3) = 0; - if ((error = linux_sys_futex(curlwp, &cup, &retval)) != 0) - printf("%s: linux_sys_futex failed\n", __func__); - - release_futexes(l); + error = do_futex((int *)led->led_clear_tid, FUTEX_WAKE, + INT_MAX, NULL, NULL, 0, 0, &retval); + if (error) + printf("%s: linux_sys_futex failed\n", __func__); + } led = l->l_emuldata; l->l_emuldata = NULL; diff --git a/sys/compat/linux/common/linux_futex.c b/sys/compat/linux/common/linux_futex.c index 64e7e8ba7623..36a014a3506b 100644 --- a/sys/compat/linux/common/linux_futex.c +++ b/sys/compat/linux/common/linux_futex.c @@ -1,7 +1,11 @@ -/* $NetBSD: linux_futex.c,v 1.38 2020/03/14 18:08:38 ad Exp $ */ +/* $NetBSD: linux_futex.c,v 1.39 2020/04/26 18:53:33 thorpej Exp $ */ /*- - * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. + * Copyright (c) 2019 The NetBSD Foundation. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Taylor R. Campbell and Jason R. Thorpe. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -11,17 +15,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Emmanuel Dreyfus - * 4. The name of the author may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. * - * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS @@ -32,86 +30,23 @@ */ #include -__KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.38 2020/03/14 18:08:38 ad Exp $"); +__KERNEL_RCSID(1, "$NetBSD: linux_futex.c,v 1.39 2020/04/26 18:53:33 thorpej Exp $"); +#include #include -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include +#include #include #include #include #include -#include #include #include #include -struct futex; - -struct waiting_proc { - struct futex *wp_futex; - kcondvar_t wp_futex_cv; - TAILQ_ENTRY(waiting_proc) wp_list; - bool wp_onlist; -}; -struct futex { - void *f_uaddr; - int f_refcount; - uint32_t f_bitset; - LIST_ENTRY(futex) f_list; - TAILQ_HEAD(, waiting_proc) f_waiting_proc; -}; - -static LIST_HEAD(futex_list, futex) futex_list; -static kmutex_t futex_lock; - -#define FUTEX_LOCK mutex_enter(&futex_lock) -#define FUTEX_UNLOCK mutex_exit(&futex_lock) -#define FUTEX_LOCKASSERT KASSERT(mutex_owned(&futex_lock)) - -#define FUTEX_SYSTEM_LOCK KERNEL_LOCK(1, NULL) -#define FUTEX_SYSTEM_UNLOCK KERNEL_UNLOCK_ONE(0) - -#ifdef DEBUG_LINUX_FUTEX -int debug_futex = 1; -#define FUTEXPRINTF(a) do { if (debug_futex) printf a; } while (0) -#else -#define FUTEXPRINTF(a) -#endif - -void -linux_futex_init(void) -{ - FUTEXPRINTF(("%s: initializing futex\n", __func__)); - mutex_init(&futex_lock, MUTEX_DEFAULT, IPL_NONE); -} - -void -linux_futex_fini(void) -{ - FUTEXPRINTF(("%s: destroying futex\n", __func__)); - mutex_destroy(&futex_lock); -} - -static struct waiting_proc *futex_wp_alloc(void); -static void futex_wp_free(struct waiting_proc *); -static struct futex *futex_get(void *, uint32_t); -static void futex_ref(struct futex *); -static void futex_put(struct futex *); -static int futex_sleep(struct futex **, lwp_t *, int, struct waiting_proc *); -static int futex_wake(struct futex *, int, struct futex *, int); -static int futex_atomic_op(lwp_t *, int, void *); - int linux_sys_futex(struct lwp *l, const struct linux_sys_futex_args *uap, register_t *retval) @@ -125,687 +60,26 @@ linux_sys_futex(struct lwp *l, const struct linux_sys_futex_args *uap, syscallarg(int) val3; } */ struct linux_timespec lts; - struct timespec ts = { 0, 0 }; + struct timespec ts, *tsp = NULL; + int val2 = 0; int error; - if ((SCARG(uap, op) & LINUX_FUTEX_CMD_MASK) == LINUX_FUTEX_WAIT && + /* + * Linux overlays the "timeout" field and the "val2" field. + * "timeout" is only valid for FUTEX_WAIT on Linux. + */ + if ((SCARG(uap, op) & FUTEX_CMD_MASK) == FUTEX_WAIT && SCARG(uap, timeout) != NULL) { if ((error = copyin(SCARG(uap, timeout), <s, sizeof(lts))) != 0) { return error; } linux_to_native_timespec(&ts, <s); - } - return linux_do_futex(l, uap, &ts, retval); -} - -/* - * Note: TS can't be const because ts2timo destroys it. - */ -int -linux_do_futex(struct lwp *l, const struct linux_sys_futex_args *uap, - struct timespec *ts, register_t *retval) -{ - /* { - syscallarg(int *) uaddr; - syscallarg(int) op; - syscallarg(int) val; - syscallarg(const struct linux_timespec *) timeout; - syscallarg(int *) uaddr2; - syscallarg(int) val3; - } */ - int val, val3; - int ret; - int error = 0; - struct futex *f; - struct futex *newf; - int tout; - struct futex *f2; - struct waiting_proc *wp; - int op_ret, cmd; - clockid_t clk; - - cmd = SCARG(uap, op) & LINUX_FUTEX_CMD_MASK; - val3 = SCARG(uap, val3); - - if (SCARG(uap, op) & LINUX_FUTEX_CLOCK_REALTIME) { - switch (cmd) { - case LINUX_FUTEX_WAIT_BITSET: - case LINUX_FUTEX_WAIT: - clk = CLOCK_REALTIME; - break; - default: - return ENOSYS; - } - } else - clk = CLOCK_MONOTONIC; - - /* - * Our implementation provides only private futexes. Most of the apps - * should use private futexes but don't claim so. Therefore we treat - * all futexes as private by clearing the FUTEX_PRIVATE_FLAG. It works - * in most cases (ie. when futexes are not shared on file descriptor - * or between different processes). - * - * Note that we don't handle bitsets at all at the moment. We need - * to move from refcounting uaddr's to handling multiple futex entries - * pointing to the same uaddr, but having possibly different bitmask. - * Perhaps move to an implementation where each uaddr has a list of - * futexes. - */ - switch (cmd) { - case LINUX_FUTEX_WAIT: - val3 = FUTEX_BITSET_MATCH_ANY; - /*FALLTHROUGH*/ - case LINUX_FUTEX_WAIT_BITSET: - if ((error = ts2timo(clk, 0, ts, &tout, NULL)) != 0) { - if (error != ETIMEDOUT) - return error; - /* - * If the user process requests a non null - * timeout, make sure we do not turn it into - * an infinite timeout because tout is 0. - * - * We use a minimal timeout of 1/hz. Maybe it - * would make sense to just return ETIMEDOUT - * without sleeping. - */ - if (SCARG(uap, timeout) != NULL) - tout = 1; - else - tout = 0; - } - FUTEX_SYSTEM_LOCK; - if ((error = copyin(SCARG(uap, uaddr), - &val, sizeof(val))) != 0) { - FUTEX_SYSTEM_UNLOCK; - return error; - } - - if (val != SCARG(uap, val)) { - FUTEX_SYSTEM_UNLOCK; - return EWOULDBLOCK; - } - - FUTEXPRINTF(("FUTEX_WAIT %d.%d: val = %d, uaddr = %p, " - "*uaddr = %d, timeout = %lld.%09ld\n", - l->l_proc->p_pid, l->l_lid, SCARG(uap, val), - SCARG(uap, uaddr), val, (long long)ts->tv_sec, - ts->tv_nsec)); - - - wp = futex_wp_alloc(); - FUTEX_LOCK; - f = futex_get(SCARG(uap, uaddr), val3); - ret = futex_sleep(&f, l, tout, wp); - futex_put(f); - FUTEX_UNLOCK; - futex_wp_free(wp); - - FUTEXPRINTF(("FUTEX_WAIT %d.%d: uaddr = %p, " - "ret = %d\n", l->l_proc->p_pid, l->l_lid, - SCARG(uap, uaddr), ret)); - - FUTEX_SYSTEM_UNLOCK; - switch (ret) { - case EWOULDBLOCK: /* timeout */ - return ETIMEDOUT; - break; - case EINTR: /* signal */ - return EINTR; - break; - case 0: /* FUTEX_WAKE received */ - FUTEXPRINTF(("FUTEX_WAIT %d.%d: uaddr = %p, got it\n", - l->l_proc->p_pid, l->l_lid, SCARG(uap, uaddr))); - return 0; - break; - default: - FUTEXPRINTF(("FUTEX_WAIT: unexpected ret = %d\n", ret)); - break; - } - - /* NOTREACHED */ - break; - - case LINUX_FUTEX_WAKE: - val = FUTEX_BITSET_MATCH_ANY; - /*FALLTHROUGH*/ - case LINUX_FUTEX_WAKE_BITSET: - /* - * XXX: Linux is able cope with different addresses - * corresponding to the same mapped memory in the sleeping - * and the waker process(es). - */ - FUTEXPRINTF(("FUTEX_WAKE %d.%d: uaddr = %p, val = %d\n", - l->l_proc->p_pid, l->l_lid, - SCARG(uap, uaddr), SCARG(uap, val))); - - FUTEX_SYSTEM_LOCK; - FUTEX_LOCK; - f = futex_get(SCARG(uap, uaddr), val3); - *retval = futex_wake(f, SCARG(uap, val), NULL, 0); - futex_put(f); - FUTEX_UNLOCK; - FUTEX_SYSTEM_UNLOCK; - - break; - - case LINUX_FUTEX_CMP_REQUEUE: - FUTEX_SYSTEM_LOCK; - - if ((error = copyin(SCARG(uap, uaddr), - &val, sizeof(val))) != 0) { - FUTEX_SYSTEM_UNLOCK; - return error; - } - - if (val != val3) { - FUTEX_SYSTEM_UNLOCK; - return EAGAIN; - } - - FUTEXPRINTF(("FUTEX_CMP_REQUEUE %d.%d: uaddr = %p, val = %d, " - "uaddr2 = %p, val2 = %d\n", - l->l_proc->p_pid, l->l_lid, - SCARG(uap, uaddr), SCARG(uap, val), SCARG(uap, uaddr2), - (int)(unsigned long)SCARG(uap, timeout))); - - FUTEX_LOCK; - f = futex_get(SCARG(uap, uaddr), val3); - newf = futex_get(SCARG(uap, uaddr2), val3); - *retval = futex_wake(f, SCARG(uap, val), newf, - (int)(unsigned long)SCARG(uap, timeout)); - futex_put(f); - futex_put(newf); - FUTEX_UNLOCK; - - FUTEX_SYSTEM_UNLOCK; - break; - - case LINUX_FUTEX_REQUEUE: - FUTEX_SYSTEM_LOCK; - - FUTEXPRINTF(("FUTEX_REQUEUE %d.%d: uaddr = %p, val = %d, " - "uaddr2 = %p, val2 = %d\n", - l->l_proc->p_pid, l->l_lid, - SCARG(uap, uaddr), SCARG(uap, val), SCARG(uap, uaddr2), - (int)(unsigned long)SCARG(uap, timeout))); - - FUTEX_LOCK; - f = futex_get(SCARG(uap, uaddr), val3); - newf = futex_get(SCARG(uap, uaddr2), val3); - *retval = futex_wake(f, SCARG(uap, val), newf, - (int)(unsigned long)SCARG(uap, timeout)); - futex_put(f); - futex_put(newf); - FUTEX_UNLOCK; - - FUTEX_SYSTEM_UNLOCK; - break; - - case LINUX_FUTEX_FD: - FUTEXPRINTF(("%s: unimplemented op %d\n", __func__, cmd)); - return ENOSYS; - case LINUX_FUTEX_WAKE_OP: - FUTEX_SYSTEM_LOCK; - - FUTEXPRINTF(("FUTEX_WAKE_OP %d.%d: uaddr = %p, op = %d, " - "val = %d, uaddr2 = %p, val2 = %d\n", - l->l_proc->p_pid, l->l_lid, - SCARG(uap, uaddr), cmd, SCARG(uap, val), - SCARG(uap, uaddr2), - (int)(unsigned long)SCARG(uap, timeout))); - - FUTEX_LOCK; - f = futex_get(SCARG(uap, uaddr), val3); - f2 = futex_get(SCARG(uap, uaddr2), val3); - FUTEX_UNLOCK; - - /* - * This function returns positive number as results and - * negative as errors - */ - op_ret = futex_atomic_op(l, val3, SCARG(uap, uaddr2)); - FUTEX_LOCK; - if (op_ret < 0) { - futex_put(f); - futex_put(f2); - FUTEX_UNLOCK; - FUTEX_SYSTEM_UNLOCK; - return -op_ret; - } - - ret = futex_wake(f, SCARG(uap, val), NULL, 0); - futex_put(f); - if (op_ret > 0) { - op_ret = 0; - /* - * Linux abuses the address of the timespec parameter - * as the number of retries - */ - op_ret += futex_wake(f2, - (int)(unsigned long)SCARG(uap, timeout), NULL, 0); - ret += op_ret; - } - futex_put(f2); - FUTEX_UNLOCK; - FUTEX_SYSTEM_UNLOCK; - *retval = ret; - break; - default: - FUTEXPRINTF(("%s: unknown op %d\n", __func__, cmd)); - return ENOSYS; - } - return 0; -} - -static struct waiting_proc * -futex_wp_alloc(void) -{ - struct waiting_proc *wp; - - wp = kmem_zalloc(sizeof(*wp), KM_SLEEP); - cv_init(&wp->wp_futex_cv, "futex"); - return wp; -} - -static void -futex_wp_free(struct waiting_proc *wp) -{ - - cv_destroy(&wp->wp_futex_cv); - kmem_free(wp, sizeof(*wp)); -} - -static struct futex * -futex_get(void *uaddr, uint32_t bitset) -{ - struct futex *f; - - FUTEX_LOCKASSERT; - - LIST_FOREACH(f, &futex_list, f_list) { - if (f->f_uaddr == uaddr) { - f->f_refcount++; - return f; - } - } - - /* Not found, create it */ - f = kmem_zalloc(sizeof(*f), KM_SLEEP); - f->f_uaddr = uaddr; - f->f_bitset = bitset; - f->f_refcount = 1; - TAILQ_INIT(&f->f_waiting_proc); - LIST_INSERT_HEAD(&futex_list, f, f_list); - - return f; -} - -static void -futex_ref(struct futex *f) -{ - - FUTEX_LOCKASSERT; - - f->f_refcount++; -} - -static void -futex_put(struct futex *f) -{ - - FUTEX_LOCKASSERT; - - f->f_refcount--; - if (f->f_refcount == 0) { - KASSERT(TAILQ_EMPTY(&f->f_waiting_proc)); - LIST_REMOVE(f, f_list); - kmem_free(f, sizeof(*f)); - } -} - -static int -futex_sleep(struct futex **fp, lwp_t *l, int timeout, struct waiting_proc *wp) -{ - struct futex *f; - int ret; - - FUTEX_LOCKASSERT; - - f = *fp; - wp->wp_futex = f; - TAILQ_INSERT_TAIL(&f->f_waiting_proc, wp, wp_list); - wp->wp_onlist = true; - ret = cv_timedwait_sig(&wp->wp_futex_cv, &futex_lock, timeout); - - /* - * we may have been requeued to a different futex before we were - * woken up, so let the caller know which futex to put. if we were - * woken by futex_wake() then it took us off the waiting list, - * but if our sleep was interrupted or timed out then we might - * need to take ourselves off the waiting list. - */ - - f = wp->wp_futex; - if (wp->wp_onlist) { - TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); - } - *fp = f; - return ret; -} - -static int -futex_wake(struct futex *f, int n, struct futex *newf, int n2) -{ - struct waiting_proc *wp; - int count = 0; - - FUTEX_LOCKASSERT; - - /* - * wake up up to n threads waiting on this futex. - */ - - while (n--) { - wp = TAILQ_FIRST(&f->f_waiting_proc); - if (wp == NULL) - return count; - - KASSERT(f == wp->wp_futex); - TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); - wp->wp_onlist = false; - cv_signal(&wp->wp_futex_cv); - count++; - } - if (newf == NULL) - return count; - - /* - * then requeue up to n2 additional threads to newf - * (without waking them up). - */ - - while (n2--) { - wp = TAILQ_FIRST(&f->f_waiting_proc); - if (wp == NULL) - return count; - - KASSERT(f == wp->wp_futex); - TAILQ_REMOVE(&f->f_waiting_proc, wp, wp_list); - futex_put(f); - - wp->wp_futex = newf; - futex_ref(newf); - TAILQ_INSERT_TAIL(&newf->f_waiting_proc, wp, wp_list); - count++; - } - return count; -} - -static int -futex_atomic_op(lwp_t *l, int encoded_op, void *uaddr) -{ - const int op = (encoded_op >> 28) & 7; - const int cmp = (encoded_op >> 24) & 15; - const int cmparg = (encoded_op << 20) >> 20; - int oparg = (encoded_op << 8) >> 20; - int error, oldval, cval; - - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - /* XXX: linux verifies access here and returns EFAULT */ - - if (copyin(uaddr, &cval, sizeof(int)) != 0) - return -EFAULT; - - for (;;) { - int nval; - - switch (op) { - case FUTEX_OP_SET: - nval = oparg; - break; - case FUTEX_OP_ADD: - nval = cval + oparg; - break; - case FUTEX_OP_OR: - nval = cval | oparg; - break; - case FUTEX_OP_ANDN: - nval = cval & ~oparg; - break; - case FUTEX_OP_XOR: - nval = cval ^ oparg; - break; - default: - return -ENOSYS; - } - - error = ucas_int(uaddr, cval, nval, &oldval); - if (error || oldval == cval) { - break; - } - cval = oldval; - } - - if (error) - return -EFAULT; - - switch (cmp) { - case FUTEX_OP_CMP_EQ: - return (oldval == cmparg); - case FUTEX_OP_CMP_NE: - return (oldval != cmparg); - case FUTEX_OP_CMP_LT: - return (oldval < cmparg); - case FUTEX_OP_CMP_GE: - return (oldval >= cmparg); - case FUTEX_OP_CMP_LE: - return (oldval <= cmparg); - case FUTEX_OP_CMP_GT: - return (oldval > cmparg); - default: - return -ENOSYS; - } -} - -int -linux_sys_set_robust_list(struct lwp *l, - const struct linux_sys_set_robust_list_args *uap, register_t *retval) -{ - /* { - syscallarg(struct linux_robust_list_head *) head; - syscallarg(size_t) len; - } */ - struct linux_emuldata *led; - - if (SCARG(uap, len) != sizeof(struct linux_robust_list_head)) - return EINVAL; - led = l->l_emuldata; - led->led_robust_head = SCARG(uap, head); - *retval = 0; - return 0; -} - -int -linux_sys_get_robust_list(struct lwp *l, - const struct linux_sys_get_robust_list_args *uap, register_t *retval) -{ - /* { - syscallarg(int) pid; - syscallarg(struct linux_robust_list_head **) head; - syscallarg(size_t *) len; - } */ - struct proc *p; - struct linux_emuldata *led; - struct linux_robust_list_head *head; - size_t len; - int error = 0; - - p = l->l_proc; - if (!SCARG(uap, pid)) { - led = l->l_emuldata; - head = led->led_robust_head; + tsp = &ts; } else { - mutex_enter(p->p_lock); - l = lwp_find(p, SCARG(uap, pid)); - if (l != NULL) { - led = l->l_emuldata; - head = led->led_robust_head; - } - mutex_exit(p->p_lock); - if (l == NULL) { - return ESRCH; - } - } -#ifdef __arch64__ - if (p->p_flag & PK_32) { - uint32_t u32; - - u32 = 12; - error = copyout(&u32, SCARG(uap, len), sizeof(u32)); - if (error) - return error; - u32 = (uint32_t)(uintptr_t)head; - return copyout(&u32, SCARG(uap, head), sizeof(u32)); - } -#endif - - len = sizeof(*head); - error = copyout(&len, SCARG(uap, len), sizeof(len)); - if (error) - return error; - return copyout(&head, SCARG(uap, head), sizeof(head)); -} - -static int -handle_futex_death(void *uaddr, pid_t pid, int pi) -{ - int uval, nval, mval; - struct futex *f; - -retry: - if (copyin(uaddr, &uval, sizeof(uval))) - return EFAULT; - - if ((uval & FUTEX_TID_MASK) == pid) { - mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; - nval = atomic_cas_32(uaddr, uval, mval); - - if (nval == -1) - return EFAULT; - - if (nval != uval) - goto retry; - - if (!pi && (uval & FUTEX_WAITERS)) { - FUTEX_LOCK; - f = futex_get(uaddr, FUTEX_BITSET_MATCH_ANY); - futex_wake(f, 1, NULL, 0); - FUTEX_UNLOCK; - } + val2 = (int)(uintptr_t)SCARG(uap, timeout); } - return 0; -} - -static int -fetch_robust_entry(struct lwp *l, struct linux_robust_list **entry, - struct linux_robust_list **head, int *pi) -{ - unsigned long uentry; - -#ifdef __arch64__ - if (l->l_proc->p_flag & PK_32) { - uint32_t u32; - - if (copyin(head, &u32, sizeof(u32))) - return EFAULT; - uentry = (unsigned long)u32; - } else -#endif - if (copyin(head, &uentry, sizeof(uentry))) - return EFAULT; - - *entry = (void *)(uentry & ~1UL); - *pi = uentry & 1; - - return 0; -} - -/* This walks the list of robust futexes, releasing them. */ -void -release_futexes(struct lwp *l) -{ - struct linux_robust_list_head head; - struct linux_robust_list *entry, *next_entry = NULL, *pending; - unsigned int limit = 2048, pi, next_pi, pip; - struct linux_emuldata *led; - unsigned long futex_offset; - int rc; - - led = l->l_emuldata; - if (led->led_robust_head == NULL) - return; - -#ifdef __arch64__ - if (l->l_proc->p_flag & PK_32) { - uint32_t u32s[3]; - - if (copyin(led->led_robust_head, u32s, sizeof(u32s))) - return; - - head.list.next = (void *)(uintptr_t)u32s[0]; - head.futex_offset = (unsigned long)u32s[1]; - head.pending_list = (void *)(uintptr_t)u32s[2]; - } else -#endif - if (copyin(led->led_robust_head, &head, sizeof(head))) - return; - - if (fetch_robust_entry(l, &entry, &head.list.next, &pi)) - return; - -#ifdef __arch64__ - if (l->l_proc->p_flag & PK_32) { - uint32_t u32; - - if (copyin(led->led_robust_head, &u32, sizeof(u32))) - return; - - head.futex_offset = (unsigned long)u32; - futex_offset = head.futex_offset; - } else -#endif - if (copyin(&head.futex_offset, &futex_offset, sizeof(unsigned long))) - return; - - if (fetch_robust_entry(l, &pending, &head.pending_list, &pip)) - return; - - while (entry != &head.list) { - rc = fetch_robust_entry(l, &next_entry, &entry->next, &next_pi); - - if (entry != pending) - if (handle_futex_death((char *)entry + futex_offset, - l->l_lid, pi)) - return; - - if (rc) - return; - - entry = next_entry; - pi = next_pi; - - if (!--limit) - break; - - preempt_point(); - } - - if (pending) - handle_futex_death((char *)pending + futex_offset, - l->l_lid, pip); + return do_futex(SCARG(uap, uaddr), SCARG(uap, op), SCARG(uap, val), + tsp, SCARG(uap, uaddr2), val2, SCARG(uap, val3), retval); } diff --git a/sys/compat/linux/common/linux_futex.h b/sys/compat/linux/common/linux_futex.h deleted file mode 100644 index c672920f717c..000000000000 --- a/sys/compat/linux/common/linux_futex.h +++ /dev/null @@ -1,93 +0,0 @@ -/* $NetBSD: linux_futex.h,v 1.8 2017/04/10 15:04:32 dholland Exp $ */ - -/*- - * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by Emmanuel Dreyfus - * 4. The name of the author may not be used to endorse or promote - * products derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _LINUX_FUTEX_H -#define _LINUX_FUTEX_H - -#define LINUX_FUTEX_WAIT 0 -#define LINUX_FUTEX_WAKE 1 -#define LINUX_FUTEX_FD 2 -#define LINUX_FUTEX_REQUEUE 3 -#define LINUX_FUTEX_CMP_REQUEUE 4 -#define LINUX_FUTEX_WAKE_OP 5 -#define LINUX_FUTEX_LOCK_PI 6 -#define LINUX_FUTEX_UNLOCK_PI 7 -#define LINUX_FUTEX_TRYLOCK_PI 8 -#define LINUX_FUTEX_WAIT_BITSET 9 -#define LINUX_FUTEX_WAKE_BITSET 10 -#define LINUX_FUTEX_WAIT_REQUEUE_PI 11 -#define LINUX_FUTEX_CMP_REQUEUE_PI 12 - -#define LINUX_FUTEX_PRIVATE_FLAG 128 -#define LINUX_FUTEX_CLOCK_REALTIME 256 -#define LINUX_FUTEX_CMD_MASK \ - (~(LINUX_FUTEX_PRIVATE_FLAG|LINUX_FUTEX_CLOCK_REALTIME)) - -#define FUTEX_OP_SET 0 -#define FUTEX_OP_ADD 1 -#define FUTEX_OP_OR 2 -#define FUTEX_OP_ANDN 3 -#define FUTEX_OP_XOR 4 -#define FUTEX_OP_OPARG_SHIFT 8 - -#define FUTEX_OP_CMP_EQ 0 -#define FUTEX_OP_CMP_NE 1 -#define FUTEX_OP_CMP_LT 2 -#define FUTEX_OP_CMP_LE 3 -#define FUTEX_OP_CMP_GT 4 -#define FUTEX_OP_CMP_GE 5 - -struct linux_robust_list { - struct linux_robust_list *next; -}; - -struct linux_robust_list_head { - struct linux_robust_list list; - unsigned long futex_offset; - struct linux_robust_list *pending_list; -}; - -#define FUTEX_WAITERS 0x80000000 -#define FUTEX_OWNER_DIED 0x40000000 -#define FUTEX_TID_MASK 0x3fffffff - -#define FUTEX_BITSET_MATCH_ANY 0xffffffff - -void release_futexes(struct lwp *); -struct linux_sys_futex_args; -int linux_do_futex(struct lwp *, const struct linux_sys_futex_args *, - struct timespec *, register_t *); -void linux_futex_init(void); -void linux_futex_fini(void); - -#endif /* !_LINUX_FUTEX_H */ diff --git a/sys/compat/linux/common/linux_mod.c b/sys/compat/linux/common/linux_mod.c index c0a3d6a8f00d..877de5ba32e1 100644 --- a/sys/compat/linux/common/linux_mod.c +++ b/sys/compat/linux/common/linux_mod.c @@ -1,4 +1,4 @@ -/* $NetBSD: linux_mod.c,v 1.13 2020/03/21 16:28:56 pgoyette Exp $ */ +/* $NetBSD: linux_mod.c,v 1.14 2020/04/26 18:53:33 thorpej Exp $ */ /*- * Copyright (c) 2008 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: linux_mod.c,v 1.13 2020/03/21 16:28:56 pgoyette Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux_mod.c,v 1.14 2020/04/26 18:53:33 thorpej Exp $"); #ifdef _KERNEL_OPT #include "opt_execfmt.h" @@ -47,7 +47,6 @@ __KERNEL_RCSID(0, "$NetBSD: linux_mod.c,v 1.13 2020/03/21 16:28:56 pgoyette Exp #include #include -#include #include #if defined(EXEC_ELF32) && ELFSIZE == 32 @@ -163,7 +162,6 @@ compat_linux_modcmd(modcmd_t cmd, void *arg) switch (cmd) { case MODULE_CMD_INIT: - linux_futex_init(); error = exec_add(linux_execsw, __arraycount(linux_execsw)); return error; @@ -172,7 +170,6 @@ compat_linux_modcmd(modcmd_t cmd, void *arg) if (error) return error; linux_sysctl_fini(); - linux_futex_fini(); return 0; default: diff --git a/sys/compat/linux32/arch/amd64/syscalls.master b/sys/compat/linux32/arch/amd64/syscalls.master index 4480717b7aaf..5c2dcda42fbf 100644 --- a/sys/compat/linux32/arch/amd64/syscalls.master +++ b/sys/compat/linux32/arch/amd64/syscalls.master @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.70 2019/11/09 23:44:31 jdolecek Exp $ + $NetBSD: syscalls.master,v 1.71 2020/04/26 18:53:33 thorpej Exp $ ; NetBSD i386 COMPAT_LINUX32 system call name/number "master" file. ; (See syscalls.conf to see what it is processed into.) @@ -527,10 +527,15 @@ 309 STD { int|linux32_sys||ppoll(netbsd32_pollfdp_t fds, u_int nfds, \ linux32_timespecp_t timeout, linux32_sigsetp_t sigset); } 310 UNIMPL unshare -311 STD { int|linux32_sys||set_robust_list( \ - linux32_robust_list_headp_t head, linux32_size_t len); } -312 STD { int|linux32_sys||get_robust_list(linux32_pid_t pid, \ - linux32_robust_list_headpp_t head, linux32_sizep_t len); } + ; + ; The netbsd32 native robust list calls have different + ; argument names / types, but they are ABI-compatible + ; with linux32. + ; +311 NOARGS { int|netbsd32||__futex_set_robust_list( \ + netbsd32_voidp head, netbsd32_size_t len); } +312 NOARGS { int|netbsd32||__futex_get_robust_list(lwpid_t lwpid, \ + netbsd32_voidp headp, netbsd32_size_tp lenp); } 313 UNIMPL splice 314 UNIMPL sync_file_range 315 UNIMPL tee diff --git a/sys/compat/linux32/common/linux32_misc.c b/sys/compat/linux32/common/linux32_misc.c index 783761ccb2cf..83a1ac943077 100644 --- a/sys/compat/linux32/common/linux32_misc.c +++ b/sys/compat/linux32/common/linux32_misc.c @@ -1,4 +1,4 @@ -/* $NetBSD: linux32_misc.c,v 1.27 2019/08/23 13:49:12 maxv Exp $ */ +/* $NetBSD: linux32_misc.c,v 1.28 2020/04/26 18:53:33 thorpej Exp $ */ /*- * Copyright (c) 1995, 1998, 1999 The NetBSD Foundation, Inc. @@ -32,7 +32,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: linux32_misc.c,v 1.27 2019/08/23 13:49:12 maxv Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux32_misc.c,v 1.28 2020/04/26 18:53:33 thorpej Exp $"); #include #include @@ -43,6 +43,7 @@ __KERNEL_RCSID(0, "$NetBSD: linux32_misc.c,v 1.27 2019/08/23 13:49:12 maxv Exp $ #include #include #include +#include #include #include @@ -61,7 +62,6 @@ __KERNEL_RCSID(0, "$NetBSD: linux32_misc.c,v 1.27 2019/08/23 13:49:12 maxv Exp $ #include #include #include -#include #include extern const struct linux_mnttypes linux_fstypes[]; @@ -243,66 +243,30 @@ linux32_sys_futex(struct lwp *l, syscallarg(linux32_intp_t) uaddr2; syscallarg(int) val3; } */ - struct linux_sys_futex_args ua; struct linux32_timespec lts; - struct timespec ts = { 0, 0 }; + struct timespec ts, *tsp = NULL; + int val2 = 0; int error; - NETBSD32TOP_UAP(uaddr, int); - NETBSD32TO64_UAP(op); - NETBSD32TO64_UAP(val); - NETBSD32TOP_UAP(timeout, struct linux_timespec); - NETBSD32TOP_UAP(uaddr2, int); - NETBSD32TO64_UAP(val3); - if ((SCARG(uap, op) & ~LINUX_FUTEX_PRIVATE_FLAG) == LINUX_FUTEX_WAIT && + /* + * Linux overlays the "timeout" field and the "val2" field. + * "timeout" is only valid for FUTEX_WAIT on Linux. + */ + if ((SCARG(uap, op) & FUTEX_CMD_MASK) == FUTEX_WAIT && SCARG_P32(uap, timeout) != NULL) { - if ((error = copyin((void *)SCARG_P32(uap, timeout), + if ((error = copyin(SCARG_P32(uap, timeout), <s, sizeof(lts))) != 0) { return error; } linux32_to_native_timespec(&ts, <s); + tsp = &ts; + } else { + val2 = (int)(uintptr_t)SCARG_P32(uap, timeout); } - return linux_do_futex(l, &ua, &ts, retval); -} -int -linux32_sys_set_robust_list(struct lwp *l, - const struct linux32_sys_set_robust_list_args *uap, register_t *retval) -{ - /* { - syscallarg(linux32_robust_list_headp_t) head; - syscallarg(linux32_size_t) len; - } */ - struct linux_sys_set_robust_list_args ua; - struct linux_emuldata *led; - - if (SCARG(uap, len) != 12) - return EINVAL; - - NETBSD32TOP_UAP(head, struct robust_list_head); - NETBSD32TOX64_UAP(len, size_t); - - led = l->l_emuldata; - led->led_robust_head = SCARG(&ua, head); - *retval = 0; - return 0; -} - -int -linux32_sys_get_robust_list(struct lwp *l, - const struct linux32_sys_get_robust_list_args *uap, register_t *retval) -{ - /* { - syscallarg(linux32_pid_t) pid; - syscallarg(linux32_robust_list_headpp_t) head; - syscallarg(linux32_sizep_t) len; - } */ - struct linux_sys_get_robust_list_args ua; - - NETBSD32TOX_UAP(pid, int); - NETBSD32TOP_UAP(head, struct robust_list_head *); - NETBSD32TOP_UAP(len, size_t *); - return linux_sys_get_robust_list(l, &ua, retval); + return do_futex(SCARG_P32(uap, uaddr), SCARG(uap, op), + SCARG(uap, val), tsp, SCARG_P32(uap, uaddr2), val2, + SCARG(uap, val3), retval); } int diff --git a/sys/compat/netbsd32/files.netbsd32 b/sys/compat/netbsd32/files.netbsd32 index 28a12303112a..d02160b0d4fa 100644 --- a/sys/compat/netbsd32/files.netbsd32 +++ b/sys/compat/netbsd32/files.netbsd32 @@ -1,4 +1,4 @@ -# $NetBSD: files.netbsd32,v 1.50 2020/03/12 15:02:29 pgoyette Exp $ +# $NetBSD: files.netbsd32,v 1.51 2020/04/26 18:53:33 thorpej Exp $ # # config file description for machine-independent netbsd32 compat code. # included by ports that need it. @@ -19,6 +19,7 @@ file compat/netbsd32/netbsd32_event.c compat_netbsd32 file compat/netbsd32/netbsd32_execve.c compat_netbsd32 file compat/netbsd32/netbsd32_fd.c compat_netbsd32 file compat/netbsd32/netbsd32_fs.c compat_netbsd32 +file compat/netbsd32/netbsd32_futex.c compat_netbsd32 file compat/netbsd32/netbsd32_kern_proc.c compat_netbsd32 file compat/netbsd32/netbsd32_ioctl.c compat_netbsd32 file compat/netbsd32/netbsd32_ipc.c compat_netbsd32 diff --git a/sys/compat/netbsd32/netbsd32_futex.c b/sys/compat/netbsd32/netbsd32_futex.c new file mode 100644 index 000000000000..1921f569dcf6 --- /dev/null +++ b/sys/compat/netbsd32/netbsd32_futex.c @@ -0,0 +1,134 @@ +/* $NetBSD: netbsd32_futex.c,v 1.1 2020/04/26 18:53:33 thorpej Exp $ */ + +/*- + * Copyright (c) 2019 The NetBSD Foundation. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Taylor R. Campbell and Jason R. Thorpe. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD: netbsd32_futex.c,v 1.1 2020/04/26 18:53:33 thorpej Exp $"); + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* Sycalls conversion */ + +int +netbsd32___futex(struct lwp *l, const struct netbsd32___futex_args *uap, + register_t *retval) +{ + /* { + syscallarg(netbsd32_intp) uaddr; + syscallarg(int) op; + syscallarg(int) val; + syscallarg(netbsd32_timespecp_t) timeout; + syscallarg(netbsd32_intp) uaddr2; + syscallarg(int) val2; + syscallarg(int) val3; + } */ + struct netbsd32_timespec ts32; + struct timespec ts, *tsp; + int error; + + /* + * Copy in the timeout argument, if specified. + */ + if (SCARG_P32(uap, timeout)) { + error = copyin(SCARG_P32(uap, timeout), &ts32, sizeof(ts32)); + if (error) + return error; + netbsd32_to_timespec(&ts32, &ts); + tsp = &ts; + } else { + tsp = NULL; + } + + return do_futex(SCARG_P32(uap, uaddr), SCARG(uap, op), + SCARG(uap, val), tsp, SCARG_P32(uap, uaddr2), SCARG(uap, val2), + SCARG(uap, val3), retval); +} + +int +netbsd32___futex_set_robust_list(struct lwp *l, + const struct netbsd32___futex_set_robust_list_args *uap, register_t *retval) +{ + /* { + syscallarg(netbsd32_voidp) head; + syscallarg(netbsd32_size_t) len; + } */ + void *head = SCARG_P32(uap, head); + + if (SCARG(uap, len) != _FUTEX_ROBUST_HEAD_SIZE32) + return EINVAL; + if ((uintptr_t)head % sizeof(uint32_t)) + return EINVAL; + + l->l_robust_head = (uintptr_t)head; + + return 0; +} + +int +netbsd32___futex_get_robust_list(struct lwp *l, + const struct netbsd32___futex_get_robust_list_args *uap, register_t *retval) +{ + /* { + syscallarg(lwpid_t) lwpid; + syscallarg(netbsd32_voidp) headp; + syscallarg(netbsd32_size_tp) lenp; + } */ + void *head; + const netbsd32_size_t len = _FUTEX_ROBUST_HEAD_SIZE32; + netbsd32_voidp head32; + int error; + + error = futex_robust_head_lookup(l, SCARG(uap, lwpid), &head); + if (error) + return error; + + head32.i32 = (uintptr_t)head; + if (NETBSD32PTR64(head32) != head) + return EFAULT; + + /* Copy out the head pointer and the head structure length. */ + /* (N.B.: "headp" is actually a "void **". */ + error = copyout(&head32, SCARG_P32(uap, headp), sizeof(head32)); + if (__predict_true(error == 0)) { + error = copyout(&len, SCARG_P32(uap, lenp), sizeof(len)); + } + + return error; +} diff --git a/sys/compat/netbsd32/syscalls.master b/sys/compat/netbsd32/syscalls.master index 4b621a67d63c..cf4264328bba 100644 --- a/sys/compat/netbsd32/syscalls.master +++ b/sys/compat/netbsd32/syscalls.master @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.135 2020/04/22 21:22:21 thorpej Exp $ + $NetBSD: syscalls.master,v 1.136 2020/04/26 18:53:33 thorpej Exp $ ; from: NetBSD: syscalls.master,v 1.81 1998/07/05 08:49:50 jonathan Exp ; @(#)syscalls.master 8.2 (Berkeley) 1/13/94 @@ -1168,3 +1168,10 @@ 486 STD { int|netbsd32|90|fhstatvfs1(netbsd32_voidp fhp, \ netbsd32_size_t fh_size, netbsd32_statvfsp_t buf, \ int flags); } +487 STD { int|netbsd32||__futex(netbsd32_intp uaddr, int op, \ + int val, const netbsd32_timespecp_t timeout, \ + netbsd32_intp uaddr2, int val2, int val3); } +488 STD { int|netbsd32||__futex_set_robust_list( \ + netbsd32_voidp head, netbsd32_size_t len); } +489 STD { int|netbsd32||__futex_get_robust_list(lwpid_t lwpid, \ + netbsd32_voidp headp, netbsd32_size_tp lenp); } diff --git a/sys/kern/files.kern b/sys/kern/files.kern index 2d6229d84194..f189701abc07 100644 --- a/sys/kern/files.kern +++ b/sys/kern/files.kern @@ -1,4 +1,4 @@ -# $NetBSD: files.kern,v 1.45 2020/04/22 09:18:42 rin Exp $ +# $NetBSD: files.kern,v 1.46 2020/04/26 18:53:33 thorpej Exp $ # # kernel sources @@ -154,6 +154,7 @@ file kern/subr_workqueue.c kern file kern/subr_xcall.c kern file kern/sys_aio.c aio file kern/sys_descrip.c kern +file kern/sys_futex.c kern file kern/sys_generic.c kern file kern/sys_module.c kern file kern/sys_mqueue.c mqueue diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 0268f7b977ab..1af340eb4b73 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -1,4 +1,4 @@ -/* $NetBSD: init_main.c,v 1.522 2020/02/24 20:47:47 jdolecek Exp $ */ +/* $NetBSD: init_main.c,v 1.523 2020/04/26 18:53:33 thorpej Exp $ */ /*- * Copyright (c) 2008, 2009, 2019 The NetBSD Foundation, Inc. @@ -97,7 +97,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.522 2020/02/24 20:47:47 jdolecek Exp $"); +__KERNEL_RCSID(0, "$NetBSD: init_main.c,v 1.523 2020/04/26 18:53:33 thorpej Exp $"); #include "opt_ddb.h" #include "opt_inet.h" @@ -180,6 +180,7 @@ extern void *_binary_splash_image_end; #include #include #include +#include #ifdef IPSEC #include #endif @@ -550,6 +551,8 @@ main(void) ipi_sysinit(); + futex_sys_init(); + /* Now timer is working. Enable preemption. */ kpreempt_enable(); diff --git a/sys/kern/kern_lwp.c b/sys/kern/kern_lwp.c index 46a18e7c97d6..277c059b50a8 100644 --- a/sys/kern/kern_lwp.c +++ b/sys/kern/kern_lwp.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_lwp.c,v 1.235 2020/04/24 03:22:06 thorpej Exp $ */ +/* $NetBSD: kern_lwp.c,v 1.236 2020/04/26 18:53:33 thorpej Exp $ */ /*- * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2019, 2020 @@ -223,7 +223,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.235 2020/04/24 03:22:06 thorpej Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.236 2020/04/26 18:53:33 thorpej Exp $"); #include "opt_ddb.h" #include "opt_lockdebug.h" @@ -258,6 +258,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_lwp.c,v 1.235 2020/04/24 03:22:06 thorpej Exp $ #include #include #include +#include #include #include @@ -2093,6 +2094,7 @@ lwp_thread_cleanup(struct lwp *l) KASSERT(l == curlwp); const lwpid_t tid = l->l_lid; + KASSERT((tid & FUTEX_TID_MASK) == tid); KASSERT(mutex_owned(l->l_proc->p_lock)); /* @@ -2103,6 +2105,14 @@ lwp_thread_cleanup(struct lwp *l) proc_hide_lwpid(tid); mutex_exit(l->l_proc->p_lock); + + /* + * If the LWP has robust futexes, release them all + * now. + */ + if (__predict_false(l->l_robust_head != 0)) { + futex_release_all_lwp(l, tid); + } } #if defined(DDB) diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 11763c347abe..5dc566919c4a 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -1,4 +1,4 @@ -/* $NetBSD: kern_proc.c,v 1.249 2020/04/26 15:49:10 thorpej Exp $ */ +/* $NetBSD: kern_proc.c,v 1.250 2020/04/26 18:53:33 thorpej Exp $ */ /*- * Copyright (c) 1999, 2006, 2007, 2008, 2020 The NetBSD Foundation, Inc. @@ -62,7 +62,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.249 2020/04/26 15:49:10 thorpej Exp $"); +__KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.250 2020/04/26 18:53:33 thorpej Exp $"); #ifdef _KERNEL_OPT #include "opt_kstack.h" @@ -106,6 +106,7 @@ __KERNEL_RCSID(0, "$NetBSD: kern_proc.c,v 1.249 2020/04/26 15:49:10 thorpej Exp #include #include #include +#include #include #include @@ -889,6 +890,9 @@ expand_pid_table(void) new_pt = kmem_alloc(tsz, KM_SLEEP); new_pt_mask = pt_size * 2 - 1; + /* XXX For now. The pratical limit is much lower anyway. */ + KASSERT(new_pt_mask <= FUTEX_TID_MASK); + rw_enter(&pid_table_lock, RW_WRITER); if (pt_size != pid_tbl_mask + 1) { /* Another process beat us to it... */ @@ -1039,6 +1043,9 @@ proc_alloc_pid_slot(struct proc *p, uintptr_t slot) pid &= pid_tbl_mask; next_free_pt = nxt & pid_tbl_mask; + /* XXX For now. The pratical limit is much lower anyway. */ + KASSERT(pid <= FUTEX_TID_MASK); + /* Grab table slot */ pt->pt_slot = slot; diff --git a/sys/kern/sys_futex.c b/sys/kern/sys_futex.c new file mode 100644 index 000000000000..d2915df6d549 --- /dev/null +++ b/sys/kern/sys_futex.c @@ -0,0 +1,1977 @@ +/* $NetBSD: sys_futex.c,v 1.1 2020/04/26 18:53:33 thorpej Exp $ */ + +/*- + * Copyright (c) 2018, 2019, 2020 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Taylor R. Campbell and Jason R. Thorpe. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD: sys_futex.c,v 1.1 2020/04/26 18:53:33 thorpej Exp $"); + +/* + * Futexes + * + * The futex system call coordinates notifying threads waiting for + * changes on a 32-bit word of memory. The word can be managed by + * CPU atomic operations in userland, without system calls, as long + * as there is no contention. + * + * The simplest use case demonstrating the utility is: + * + * // 32-bit word of memory shared among threads or + * // processes in userland. lock & 1 means owned; + * // lock & 2 means there are waiters waiting. + * volatile int lock = 0; + * + * int v; + * + * // Acquire a lock. + * do { + * v = lock; + * if (v & 1) { + * // Lock is held. Set a bit to say that + * // there are waiters, and wait for lock + * // to change to anything other than v; + * // then retry. + * if (atomic_cas_uint(&lock, v, v | 2) != v) + * continue; + * futex(FUTEX_WAIT, &lock, v | 2, NULL, NULL, 0); + * continue; + * } + * } while (atomic_cas_uint(&lock, v, v & ~1) != v); + * membar_enter(); + * + * ... + * + * // Release the lock. Optimistically assume there are + * // no waiters first until demonstrated otherwise. + * membar_exit(); + * if (atomic_cas_uint(&lock, 1, 0) != 1) { + * // There may be waiters. + * v = atomic_swap_uint(&lock, 0); + * // If there are still waiters, wake one. + * if (v & 2) + * futex(FUTEX_WAKE, &lock, 1, NULL, NULL, 0); + * } + * + * The goal is to avoid the futex system call unless there is + * contention; then if there is contention, to guarantee no missed + * wakeups. + * + * For a simple implementation, futex(FUTEX_WAIT) could queue + * itself to be woken, double-check the lock word, and then sleep; + * spurious wakeups are generally a fact of life, so any + * FUTEX_WAKE could just wake every FUTEX_WAIT in the system. + * + * If this were all there is to it, we could then increase + * parallelism by refining the approximation: partition the + * waiters into buckets by hashing the lock addresses to reduce + * the incidence of spurious wakeups. But this is not all. + * + * The futex(FUTEX_CMP_REQUEUE, &lock, n, &lock2, m, val) + * operation not only wakes n waiters on lock if lock == val, but + * also _transfers_ m additional waiters to lock2. Unless wakeups + * on lock2 also trigger wakeups on lock, we cannot move waiters + * to lock2 if they merely share the same hash as waiters on lock. + * Thus, we can't approximately distribute waiters into queues by + * a hash function; we must distinguish futex queues exactly by + * lock address. + * + * For now, we use a global red/black tree to index futexes. This + * should be replaced by a lockless radix tree with a thread to + * free entries no longer in use once all lookups on all CPUs have + * completed. + * + * Specifically, we maintain two maps: + * + * futex_tab.va[vmspace, va] for private futexes + * futex_tab.oa[uvm_voaddr] for shared futexes + * + * This implementation does not support priority inheritance. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +/* + * Lock order: + * + * futex_tab.lock + * futex::fx_qlock ordered by kva of struct futex + * -> futex_wait::fw_lock only one at a time + * futex_wait::fw_lock only one at a time + * -> futex::fx_abortlock only one at a time + */ + +/* + * union futex_key + * + * A futex is addressed either by a vmspace+va (private) or by + * a uvm_voaddr (shared). + */ +union futex_key { + struct { + struct vmspace *vmspace; + vaddr_t va; + } fk_private; + struct uvm_voaddr fk_shared; +}; + +/* + * struct futex + * + * Kernel state for a futex located at a particular address in a + * particular virtual address space. + * + * N.B. fx_refcnt is an unsigned long because we need to be able + * to operate on it atomically on all systems while at the same + * time rendering practically impossible the chance of it reaching + * its max value. In practice, we're limited by the number of LWPs + * that can be present on the system at any given time, and the + * assumption is that limit will be good enough on a 32-bit platform. + * See futex_wake() for why overflow needs to be avoided. + */ +struct futex { + union futex_key fx_key; + unsigned long fx_refcnt; + bool fx_shared; + bool fx_on_tree; + struct rb_node fx_node; + + kmutex_t fx_qlock; + TAILQ_HEAD(, futex_wait) fx_queue; + + kmutex_t fx_abortlock; + LIST_HEAD(, futex_wait) fx_abortlist; + kcondvar_t fx_abortcv; +}; + +/* + * struct futex_wait + * + * State for a thread to wait on a futex. Threads wait on fw_cv + * for fw_bitset to be set to zero. The thread may transition to + * a different futex queue at any time under the futex's lock. + */ +struct futex_wait { + kmutex_t fw_lock; + kcondvar_t fw_cv; + struct futex *fw_futex; + TAILQ_ENTRY(futex_wait) fw_entry; /* queue lock */ + LIST_ENTRY(futex_wait) fw_abort; /* queue abortlock */ + int fw_bitset; +}; + +/* + * futex_tab + * + * Global trees of futexes by vmspace/va and VM object address. + * + * XXX This obviously doesn't scale in parallel. We could use a + * pserialize-safe data structure, but there may be a high cost to + * frequent deletion since we don't cache futexes after we're done + * with them. We could use hashed locks. But for now, just make + * sure userland can't DoS the serial performance, by using a + * balanced binary tree for lookup. + * + * XXX We could use a per-process tree for the table indexed by + * virtual address to reduce contention between processes. + */ +static struct { + kmutex_t lock; + struct rb_tree va; + struct rb_tree oa; +} futex_tab __cacheline_aligned; + +static int +compare_futex_key(void *cookie, const void *n, const void *k) +{ + const struct futex *fa = n; + const union futex_key *fka = &fa->fx_key; + const union futex_key *fkb = k; + + if ((uintptr_t)fka->fk_private.vmspace < + (uintptr_t)fkb->fk_private.vmspace) + return -1; + if ((uintptr_t)fka->fk_private.vmspace > + (uintptr_t)fkb->fk_private.vmspace) + return +1; + if (fka->fk_private.va < fkb->fk_private.va) + return -1; + if (fka->fk_private.va > fkb->fk_private.va) + return -1; + return 0; +} + +static int +compare_futex(void *cookie, const void *na, const void *nb) +{ + const struct futex *fa = na; + const struct futex *fb = nb; + + return compare_futex_key(cookie, fa, &fb->fx_key); +} + +static const rb_tree_ops_t futex_rb_ops = { + .rbto_compare_nodes = compare_futex, + .rbto_compare_key = compare_futex_key, + .rbto_node_offset = offsetof(struct futex, fx_node), +}; + +static int +compare_futex_shared_key(void *cookie, const void *n, const void *k) +{ + const struct futex *fa = n; + const union futex_key *fka = &fa->fx_key; + const union futex_key *fkb = k; + + return uvm_voaddr_compare(&fka->fk_shared, &fkb->fk_shared); +} + +static int +compare_futex_shared(void *cookie, const void *na, const void *nb) +{ + const struct futex *fa = na; + const struct futex *fb = nb; + + return compare_futex_shared_key(cookie, fa, &fb->fx_key); +} + +static const rb_tree_ops_t futex_shared_rb_ops = { + .rbto_compare_nodes = compare_futex_shared, + .rbto_compare_key = compare_futex_shared_key, + .rbto_node_offset = offsetof(struct futex, fx_node), +}; + +static void futex_wait_dequeue(struct futex_wait *, struct futex *); + +/* + * futex_load(uaddr, kaddr) + * + * Perform a single atomic load to read *uaddr, and return the + * result in *kaddr. Return 0 on success, EFAULT if uaddr is not + * mapped. + */ +static inline int +futex_load(int *uaddr, int *kaddr) +{ + return ufetch_int((u_int *)uaddr, (u_int *)kaddr); +} + +/* + * futex_test(uaddr, expected) + * + * True if *uaddr == expected. False if *uaddr != expected, or if + * uaddr is not mapped. + */ +static bool +futex_test(int *uaddr, int expected) +{ + int val; + int error; + + error = futex_load(uaddr, &val); + if (error) + return false; + return val == expected; +} + +/* + * futex_sys_init() + * + * Initialize the futex subsystem. + */ +void +futex_sys_init(void) +{ + + mutex_init(&futex_tab.lock, MUTEX_DEFAULT, IPL_NONE); + rb_tree_init(&futex_tab.va, &futex_rb_ops); + rb_tree_init(&futex_tab.oa, &futex_shared_rb_ops); +} + +/* + * futex_sys_fini() + * + * Finalize the futex subsystem. + */ +void +futex_sys_fini(void) +{ + + KASSERT(RB_TREE_MIN(&futex_tab.oa) == NULL); + KASSERT(RB_TREE_MIN(&futex_tab.va) == NULL); + mutex_destroy(&futex_tab.lock); +} + +/* + * futex_queue_init(f) + * + * Initialize the futex queue. Caller must call futex_queue_fini + * when done. + * + * Never sleeps. + */ +static void +futex_queue_init(struct futex *f) +{ + + mutex_init(&f->fx_qlock, MUTEX_DEFAULT, IPL_NONE); + mutex_init(&f->fx_abortlock, MUTEX_DEFAULT, IPL_NONE); + cv_init(&f->fx_abortcv, "fqabort"); + LIST_INIT(&f->fx_abortlist); + TAILQ_INIT(&f->fx_queue); +} + +/* + * futex_queue_drain(f) + * + * Wait for any aborting waiters in f; then empty the queue of + * any stragglers and wake them. Caller must guarantee no new + * references to f. + * + * May sleep. + */ +static void +futex_queue_drain(struct futex *f) +{ + struct futex_wait *fw, *fw_next; + + mutex_enter(&f->fx_abortlock); + while (!LIST_EMPTY(&f->fx_abortlist)) + cv_wait(&f->fx_abortcv, &f->fx_abortlock); + mutex_exit(&f->fx_abortlock); + + mutex_enter(&f->fx_qlock); + TAILQ_FOREACH_SAFE(fw, &f->fx_queue, fw_entry, fw_next) { + mutex_enter(&fw->fw_lock); + futex_wait_dequeue(fw, f); + cv_broadcast(&fw->fw_cv); + mutex_exit(&fw->fw_lock); + } + mutex_exit(&f->fx_qlock); +} + +/* + * futex_queue_fini(fq) + * + * Finalize the futex queue initialized by futex_queue_init. Queue + * must be empty. Caller must not use f again until a subsequent + * futex_queue_init. + */ +static void +futex_queue_fini(struct futex *f) +{ + + KASSERT(TAILQ_EMPTY(&f->fx_queue)); + KASSERT(LIST_EMPTY(&f->fx_abortlist)); + mutex_destroy(&f->fx_qlock); + mutex_destroy(&f->fx_abortlock); + cv_destroy(&f->fx_abortcv); +} + +/* + * futex_key_init(key, vm, va, shared) + * + * Initialize a futex key for lookup, etc. + */ +static int +futex_key_init(union futex_key *fk, struct vmspace *vm, vaddr_t va, bool shared) +{ + int error = 0; + + if (__predict_false(shared)) { + if (!uvm_voaddr_acquire(&vm->vm_map, va, &fk->fk_shared)) + error = EFAULT; + } else { + fk->fk_private.vmspace = vm; + fk->fk_private.va = va; + } + + return error; +} + +/* + * futex_key_fini(key, shared) + * + * Release a futex key. + */ +static void +futex_key_fini(union futex_key *fk, bool shared) +{ + if (__predict_false(shared)) + uvm_voaddr_release(&fk->fk_shared); + memset(fk, 0, sizeof(*fk)); +} + +/* + * futex_create(fk, shared) + * + * Create a futex. Initial reference count is 1, representing the + * caller. Returns NULL on failure. Always takes ownership of the + * key, either transferring it to the newly-created futex, or releasing + * the key if creation fails. + * + * Never sleeps for memory, but may sleep to acquire a lock. + */ +static struct futex * +futex_create(union futex_key *fk, bool shared) +{ + struct futex *f; + + f = kmem_alloc(sizeof(*f), KM_NOSLEEP); + if (f == NULL) { + futex_key_fini(fk, shared); + return NULL; + } + f->fx_key = *fk; + f->fx_refcnt = 1; + f->fx_shared = shared; + f->fx_on_tree = false; + futex_queue_init(f); + + return f; +} + +/* + * futex_destroy(f) + * + * Destroy a futex created with futex_create. Reference count + * must be zero. + * + * May sleep. + */ +static void +futex_destroy(struct futex *f) +{ + + ASSERT_SLEEPABLE(); + + KASSERT(atomic_load_relaxed(&f->fx_refcnt) == 0); + KASSERT(!f->fx_on_tree); + + /* Drain and destroy the private queue. */ + futex_queue_drain(f); + futex_queue_fini(f); + + futex_key_fini(&f->fx_key, f->fx_shared); + + kmem_free(f, sizeof(*f)); +} + +/* + * futex_hold(f) + * + * Attempt to acquire a reference to f. Return 0 on success, + * ENFILE on too many references. + * + * Never sleeps. + */ +static int +futex_hold(struct futex *f) +{ + unsigned long refcnt; + + do { + refcnt = atomic_load_relaxed(&f->fx_refcnt); + if (refcnt == ULONG_MAX) + return ENFILE; + } while (atomic_cas_ulong(&f->fx_refcnt, refcnt, refcnt + 1) != refcnt); + + return 0; +} + +/* + * futex_rele(f) + * + * Release a reference to f acquired with futex_create or + * futex_hold. + * + * May sleep to free f. + */ +static void +futex_rele(struct futex *f) +{ + unsigned long refcnt; + + ASSERT_SLEEPABLE(); + + do { + refcnt = atomic_load_relaxed(&f->fx_refcnt); + if (refcnt == 1) + goto trylast; + } while (atomic_cas_ulong(&f->fx_refcnt, refcnt, refcnt - 1) != refcnt); + return; + +trylast: + mutex_enter(&futex_tab.lock); + if (atomic_dec_ulong_nv(&f->fx_refcnt) == 0) { + if (f->fx_on_tree) { + if (__predict_false(f->fx_shared)) + rb_tree_remove_node(&futex_tab.oa, f); + else + rb_tree_remove_node(&futex_tab.va, f); + f->fx_on_tree = false; + } + } else { + /* References remain -- don't destroy it. */ + f = NULL; + } + mutex_exit(&futex_tab.lock); + if (f != NULL) + futex_destroy(f); +} + +/* + * futex_rele_not_last(f) + * + * Release a reference to f acquired with futex_create or + * futex_hold. + * + * This version asserts that we are not dropping the last + * reference to f. + */ +static void +futex_rele_not_last(struct futex *f) +{ + unsigned long refcnt; + + do { + refcnt = atomic_load_relaxed(&f->fx_refcnt); + KASSERT(refcnt > 1); + } while (atomic_cas_ulong(&f->fx_refcnt, refcnt, refcnt - 1) != refcnt); +} + +/* + * futex_lookup_by_key(key, shared, &f) + * + * Try to find an existing futex va reference in the specified key + * On success, return 0, set f to found futex or to NULL if not found, + * and increment f's reference count if found. + * + * Return ENFILE if reference count too high. + * + * Internal lookup routine shared by futex_lookup() and + * futex_get(). + */ +static int +futex_lookup_by_key(union futex_key *fk, bool shared, struct futex **fp) +{ + struct futex *f; + int error = 0; + + mutex_enter(&futex_tab.lock); + if (__predict_false(shared)) { + f = rb_tree_find_node(&futex_tab.oa, fk); + } else { + f = rb_tree_find_node(&futex_tab.va, fk); + } + if (f) { + error = futex_hold(f); + if (error) + f = NULL; + } + *fp = f; + mutex_exit(&futex_tab.lock); + + return error; +} + +/* + * futex_insert(f, fp) + * + * Try to insert the futex f into the tree by va. If there + * already is a futex for its va, acquire a reference to it, and + * store it in *fp; otherwise store f in *fp. + * + * Return 0 on success, ENFILE if there already is a futex but its + * reference count is too high. + */ +static int +futex_insert(struct futex *f, struct futex **fp) +{ + struct futex *f0; + int error; + + KASSERT(atomic_load_relaxed(&f->fx_refcnt) != 0); + KASSERT(!f->fx_on_tree); + + mutex_enter(&futex_tab.lock); + if (__predict_false(f->fx_shared)) + f0 = rb_tree_insert_node(&futex_tab.oa, f); + else + f0 = rb_tree_insert_node(&futex_tab.va, f); + if (f0 == f) { + f->fx_on_tree = true; + error = 0; + } else { + KASSERT(atomic_load_relaxed(&f0->fx_refcnt) != 0); + KASSERT(f0->fx_on_tree); + error = futex_hold(f0); + if (error) + goto out; + } + *fp = f0; +out: mutex_exit(&futex_tab.lock); + + return error; +} + +/* + * futex_lookup(uaddr, shared, &f) + * + * Find a futex at the userland pointer uaddr in the current + * process's VM space. On success, return the futex in f and + * increment its reference count. + * + * Caller must call futex_put when done. + */ +static int +futex_lookup(int *uaddr, bool shared, struct futex **fp) +{ + union futex_key fk; + struct vmspace *vm = curproc->p_vmspace; + vaddr_t va = (vaddr_t)uaddr; + int error; + + /* + * Reject unaligned user pointers so we don't cross page + * boundaries and so atomics will work. + */ + if ((va & 3) != 0) + return EINVAL; + + CTASSERT((PAGE_SIZE & 3) == 0); + + /* Look it up. */ + error = futex_key_init(&fk, vm, va, shared); + if (error) + return error; + + error = futex_lookup_by_key(&fk, shared, fp); + futex_key_fini(&fk, shared); + if (error) + return error; + + KASSERT(*fp == NULL || (*fp)->fx_shared == shared); + KASSERT(*fp == NULL || atomic_load_relaxed(&(*fp)->fx_refcnt) != 0); + + /* + * Success! (Caller must still check whether we found + * anything, but nothing went _wrong_ like trying to use + * unmapped memory.) + */ + KASSERT(error == 0); + + return error; +} + +/* + * futex_get(uaddr, shared, &f) + * + * Find or create a futex at the userland pointer uaddr in the + * current process's VM space. On success, return the futex in f + * and increment its reference count. + * + * Caller must call futex_put when done. + */ +static int +futex_get(int *uaddr, bool shared, struct futex **fp) +{ + union futex_key fk; + struct vmspace *vm = curproc->p_vmspace; + struct futex *f = NULL; + vaddr_t va = (vaddr_t)uaddr; + int error; + + /* + * Reject unaligned user pointers so we don't cross page + * boundaries and so atomics will work. + */ + if ((va & 3) != 0) + return EINVAL; + + CTASSERT((PAGE_SIZE & 3) == 0); + + error = futex_key_init(&fk, vm, va, shared); + if (error) + return error; + + /* + * Optimistically assume there already is one, and try to find + * it. + */ + error = futex_lookup_by_key(&fk, shared, fp); + if (error || *fp != NULL) { + /* + * We either found one, or there was an error. + * In either case, we are done with the key. + */ + futex_key_fini(&fk, shared); + goto out; + } + + /* + * Create a futex recoard. This tranfers ownership of the key + * in all cases. + */ + f = futex_create(&fk, shared); + if (f == NULL) { + error = ENOMEM; + goto out; + } + + /* + * Insert our new futex, or use existing if someone else beat + * us to it. + */ + error = futex_insert(f, fp); + if (error) + goto out; + if (*fp == f) + f = NULL; /* don't release on exit */ + + /* Success! */ + KASSERT(error == 0); + +out: if (f != NULL) + futex_rele(f); + KASSERT(error || *fp != NULL); + KASSERT(error || atomic_load_relaxed(&(*fp)->fx_refcnt) != 0); + return error; +} + +/* + * futex_put(f) + * + * Release a futex acquired with futex_get or futex_lookup. + */ +static void +futex_put(struct futex *f) +{ + + futex_rele(f); +} + +/* + * futex_wait_init(fw, bitset) + * + * Initialize a record for a thread to wait on a futex matching + * the specified bit set. Should be passed to futex_wait_enqueue + * before futex_wait, and should be passed to futex_wait_fini when + * done. + */ +static void +futex_wait_init(struct futex_wait *fw, int bitset) +{ + + mutex_init(&fw->fw_lock, MUTEX_DEFAULT, IPL_NONE); + cv_init(&fw->fw_cv, "futex"); + fw->fw_futex = NULL; + fw->fw_bitset = bitset; +} + +/* + * futex_wait_fini(fw) + * + * Finalize a record for a futex waiter. Must not be on any + * futex's queue. + */ +static void +futex_wait_fini(struct futex_wait *fw) +{ + + cv_destroy(&fw->fw_cv); + mutex_destroy(&fw->fw_lock); +} + +/* + * futex_wait_enqueue(fw, f) + * + * Put fw on the futex queue. Must be done before futex_wait. + * Caller must hold fw's lock and f's lock, and fw must not be on + * any existing futex's waiter list. + */ +static void +futex_wait_enqueue(struct futex_wait *fw, struct futex *f) +{ + + KASSERT(mutex_owned(&f->fx_qlock)); + KASSERT(mutex_owned(&fw->fw_lock)); + KASSERT(fw->fw_futex == NULL); + + fw->fw_futex = f; + TAILQ_INSERT_TAIL(&f->fx_queue, fw, fw_entry); +} + +/* + * futex_wait_dequeue(fw, f) + * + * Remove fw from the futex queue. Precludes subsequent + * futex_wait until a futex_wait_enqueue. Caller must hold fw's + * lock and f's lock, and fw must be on f. + */ +static void +futex_wait_dequeue(struct futex_wait *fw, struct futex *f) +{ + + KASSERT(mutex_owned(&f->fx_qlock)); + KASSERT(mutex_owned(&fw->fw_lock)); + KASSERT(fw->fw_futex == f); + + TAILQ_REMOVE(&f->fx_queue, fw, fw_entry); + fw->fw_futex = NULL; +} + +/* + * futex_wait_abort(fw) + * + * Caller is no longer waiting for fw. Remove it from any queue + * if it was on one. + */ +static void +futex_wait_abort(struct futex_wait *fw) +{ + struct futex *f; + + /* Acquire fw_lock so that the content of fw won't change. */ + mutex_enter(&fw->fw_lock); + + /* + * Grab the futex queue. It can't go away as long as we hold + * fw_lock. However, we can't take the queue lock because + * that's a lock order reversal. + */ + f = fw->fw_futex; + + /* Put us on the abort list so that fq won't go away. */ + mutex_enter(&f->fx_abortlock); + LIST_INSERT_HEAD(&f->fx_abortlist, fw, fw_abort); + mutex_exit(&f->fx_abortlock); + + /* f is now stable, so we can release fw_lock. */ + mutex_exit(&fw->fw_lock); + + /* Now we can remove fw under the queue lock. */ + mutex_enter(&f->fx_qlock); + TAILQ_REMOVE(&f->fx_queue, fw, fw_entry); + mutex_exit(&f->fx_qlock); + + /* + * Finally, remove us from the abort list and notify anyone + * waiting for the abort to complete if we were the last to go. + */ + mutex_enter(&f->fx_abortlock); + LIST_REMOVE(fw, fw_abort); + if (LIST_EMPTY(&f->fx_abortlist)) + cv_broadcast(&f->fx_abortcv); + mutex_exit(&f->fx_abortlock); +} + +/* + * futex_wait(fw, deadline, clkid) + * + * fw must be a waiter on a futex's queue. Wait until deadline on + * the clock clkid, or forever if deadline is NULL, for a futex + * wakeup. Return 0 on explicit wakeup or destruction of futex, + * ETIMEDOUT on timeout, EINTR/ERESTART on signal. + */ +static int +futex_wait(struct futex_wait *fw, const struct timespec *deadline, + clockid_t clkid) +{ + int error = 0; + + /* Test and wait under the wait lock. */ + mutex_enter(&fw->fw_lock); + while (fw->fw_bitset && fw->fw_futex != NULL) { + /* Not done yet. Wait. */ + if (deadline) { + struct timespec ts; + + /* Check our watch. */ + error = clock_gettime1(clkid, &ts); + if (error) + break; + + /* If we're past the deadline, ETIMEDOUT. */ + if (timespeccmp(deadline, &ts, <=)) { + error = ETIMEDOUT; + break; + } + + /* Count how much time is left. */ + timespecsub(deadline, &ts, &ts); + + /* Wait for that much time, allowing signals. */ + error = cv_timedwait_sig(&fw->fw_cv, &fw->fw_lock, + tstohz(&ts)); + } else { + /* Wait indefinitely, allowing signals. */ + error = cv_wait_sig(&fw->fw_cv, &fw->fw_lock); + } + if (error) { + /* Convert EWOULDBLOCK to ETIMEDOUT. */ + if (error == EWOULDBLOCK) + error = ETIMEDOUT; + break; + } + } + mutex_exit(&fw->fw_lock); + + return error; +} + +/* + * futex_wake(f, nwake, f2, nrequeue, bitset) + * + * Wake up to nwake waiters on f matching bitset; then, if f2 is + * provided, move up to nrequeue remaining waiters on f matching + * bitset to f2. Return the number of waiters actually woken. + * Caller must hold the locks of f and f2, if provided. + */ +static unsigned +futex_wake(struct futex *f, unsigned nwake, struct futex *f2, + unsigned nrequeue, int bitset) +{ + struct futex_wait *fw, *fw_next; + unsigned nwoken = 0; + int hold_error; + + KASSERT(mutex_owned(&f->fx_qlock)); + KASSERT(f2 == NULL || mutex_owned(&f2->fx_qlock)); + + /* Wake up to nwake waiters, and count the number woken. */ + TAILQ_FOREACH_SAFE(fw, &f->fx_queue, fw_entry, fw_next) { + if ((fw->fw_bitset & bitset) == 0) + continue; + if (nwake-- > 0) { + mutex_enter(&fw->fw_lock); + futex_wait_dequeue(fw, f); + fw->fw_bitset = 0; + cv_broadcast(&fw->fw_cv); + mutex_exit(&fw->fw_lock); + nwoken++; + /* + * Drop the futex reference on behalf of the + * waiter. We assert this is not the last + * reference on the futex (our caller should + * also have one). + */ + futex_rele_not_last(f); + } else { + break; + } + } + + if (f2) { + /* Move up to nrequeue waiters from f's queue to f2's queue. */ + TAILQ_FOREACH_SAFE(fw, &f->fx_queue, fw_entry, fw_next) { + if ((fw->fw_bitset & bitset) == 0) + continue; + if (nrequeue-- > 0) { + mutex_enter(&fw->fw_lock); + futex_wait_dequeue(fw, f); + futex_wait_enqueue(fw, f2); + mutex_exit(&fw->fw_lock); + /* + * Transfer the reference from f to f2. + * As above, we assert that we are not + * dropping the last reference to f here. + * + * XXX futex_hold() could theoretically + * XXX fail here. + */ + futex_rele_not_last(f); + hold_error = futex_hold(f2); + KASSERT(hold_error == 0); + } else { + break; + } + } + } else { + KASSERT(nrequeue == 0); + } + + /* Return the number of waiters woken. */ + return nwoken; +} + +/* + * futex_queue_lock(f) + * + * Acquire the queue lock of f. Pair with futex_queue_unlock. Do + * not use if caller needs to acquire two locks; use + * futex_queue_lock2 instead. + */ +static void +futex_queue_lock(struct futex *f) +{ + mutex_enter(&f->fx_qlock); +} + +/* + * futex_queue_unlock(f) + * + * Release the queue lock of f. + */ +static void +futex_queue_unlock(struct futex *f) +{ + mutex_exit(&f->fx_qlock); +} + +/* + * futex_queue_lock2(f, f2) + * + * Acquire the queue locks of both f and f2, which may be null, or + * which may have the same underlying queue. If they are + * distinct, an arbitrary total order is chosen on the locks. + * + * Callers should only ever acquire multiple queue locks + * simultaneously using futex_queue_lock2. + */ +static void +futex_queue_lock2(struct futex *f, struct futex *f2) +{ + + /* + * If both are null, do nothing; if one is null and the other + * is not, lock the other and be done with it. + */ + if (f == NULL && f2 == NULL) { + return; + } else if (f == NULL) { + mutex_enter(&f2->fx_qlock); + return; + } else if (f2 == NULL) { + mutex_enter(&f->fx_qlock); + return; + } + + /* If both futexes are the same, acquire only one. */ + if (f == f2) { + mutex_enter(&f->fx_qlock); + return; + } + + /* Otherwise, use the ordering on the kva of the futex pointer. */ + if ((uintptr_t)f < (uintptr_t)f2) { + mutex_enter(&f->fx_qlock); + mutex_enter(&f2->fx_qlock); + } else { + mutex_enter(&f2->fx_qlock); + mutex_enter(&f->fx_qlock); + } +} + +/* + * futex_queue_unlock2(f, f2) + * + * Release the queue locks of both f and f2, which may be null, or + * which may have the same underlying queue. + */ +static void +futex_queue_unlock2(struct futex *f, struct futex *f2) +{ + + /* + * If both are null, do nothing; if one is null and the other + * is not, unlock the other and be done with it. + */ + if (f == NULL && f2 == NULL) { + return; + } else if (f == NULL) { + mutex_exit(&f2->fx_qlock); + return; + } else if (f2 == NULL) { + mutex_exit(&f->fx_qlock); + return; + } + + /* If both futexes are the same, release only one. */ + if (f == f2) { + mutex_exit(&f->fx_qlock); + return; + } + + /* Otherwise, use the ordering on the kva of the futex pointer. */ + if ((uintptr_t)f < (uintptr_t)f2) { + mutex_exit(&f2->fx_qlock); + mutex_exit(&f->fx_qlock); + } else { + mutex_exit(&f->fx_qlock); + mutex_exit(&f2->fx_qlock); + } +} + +/* + * futex_func_wait(uaddr, val, val3, timeout, clkid, clkflags, retval) + * + * Implement futex(FUTEX_WAIT). + */ +static int +futex_func_wait(bool shared, int *uaddr, int val, int val3, + const struct timespec *timeout, clockid_t clkid, int clkflags, + register_t *retval) +{ + struct futex *f; + struct futex_wait wait, *fw = &wait; + struct timespec ts; + const struct timespec *deadline; + int error; + + /* Optimistically test before anything else. */ + if (!futex_test(uaddr, val)) + return EAGAIN; + + /* Determine a deadline on the specified clock. */ + if (timeout == NULL || (clkflags & TIMER_ABSTIME) == TIMER_ABSTIME) { + deadline = timeout; + } else { + error = clock_gettime1(clkid, &ts); + if (error) + return error; + timespecadd(&ts, timeout, &ts); + deadline = &ts; + } + + /* Get the futex, creating it if necessary. */ + error = futex_get(uaddr, shared, &f); + if (error) + return error; + KASSERT(f); + + /* Get ready to wait. */ + futex_wait_init(fw, val3); + + /* + * Under the queue lock, check the value again: if it has + * already changed, EAGAIN; otherwise enqueue the waiter. + * Since FUTEX_WAKE will use the same lock and be done after + * modifying the value, the order in which we check and enqueue + * is immaterial. + */ + futex_queue_lock(f); + if (!futex_test(uaddr, val)) { + futex_queue_unlock(f); + error = EAGAIN; + goto out; + } + mutex_enter(&fw->fw_lock); + futex_wait_enqueue(fw, f); + mutex_exit(&fw->fw_lock); + futex_queue_unlock(f); + + /* + * We cannot drop our reference to the futex here, because + * we might be enqueued on a different one when we are awakened. + * The references will be managed on our behalf in the requeue + * and wake cases. + */ + f = NULL; + + /* Wait. */ + error = futex_wait(fw, deadline, clkid); + if (error) { + futex_wait_abort(fw); + goto out; + } + + /* Return 0 on success, error on failure. */ + *retval = 0; + +out: if (f != NULL) + futex_put(f); + futex_wait_fini(fw); + return error; +} + +/* + * futex_func_wake(uaddr, val, val3, retval) + * + * Implement futex(FUTEX_WAKE) and futex(FUTEX_WAKE_BITSET). + */ +static int +futex_func_wake(bool shared, int *uaddr, int val, int val3, register_t *retval) +{ + struct futex *f; + unsigned int nwoken = 0; + int error = 0; + + /* Reject negative number of wakeups. */ + if (val < 0) { + error = EINVAL; + goto out; + } + + /* Look up the futex, if any. */ + error = futex_lookup(uaddr, shared, &f); + if (error) + goto out; + + /* If there's no futex, there are no waiters to wake. */ + if (f == NULL) + goto out; + + /* + * Under f's queue lock, wake the waiters and remember the + * number woken. + */ + futex_queue_lock(f); + nwoken = futex_wake(f, val, NULL, 0, val3); + futex_queue_unlock(f); + + /* Release the futex. */ + futex_put(f); + +out: + /* Return the number of waiters woken. */ + *retval = nwoken; + + /* Success! */ + return error; +} + +/* + * futex_func_requeue(op, uaddr, val, uaddr2, val2, val3, retval) + * + * Implement futex(FUTEX_REQUEUE) and futex(FUTEX_CMP_REQUEUE). + */ +static int +futex_func_requeue(bool shared, int op, int *uaddr, int val, int *uaddr2, + int val2, int val3, register_t *retval) +{ + struct futex *f = NULL, *f2 = NULL; + unsigned nwoken = 0; /* default to zero woken on early return */ + int error; + + /* Reject negative number of wakeups or requeues. */ + if (val < 0 || val2 < 0) { + error = EINVAL; + goto out; + } + + /* Look up the source futex, if any. */ + error = futex_lookup(uaddr, shared, &f); + if (error) + goto out; + + /* If there is none, nothing to do. */ + if (f == NULL) + goto out; + + /* + * We may need to create the destination futex because it's + * entirely possible it does not currently have any waiters. + */ + error = futex_get(uaddr2, shared, &f2); + if (error) + goto out; + + /* + * Under the futexes' queue locks, check the value; if + * unchanged from val3, wake the waiters. + */ + futex_queue_lock2(f, f2); + if (op == FUTEX_CMP_REQUEUE && !futex_test(uaddr, val3)) { + error = EAGAIN; + } else { + error = 0; + nwoken = futex_wake(f, val, f2, val2, FUTEX_BITSET_MATCH_ANY); + } + futex_queue_unlock2(f, f2); + +out: + /* Return the number of waiters woken. */ + *retval = nwoken; + + /* Release the futexes if we got them. */ + if (f2) + futex_put(f2); + if (f) + futex_put(f); + return error; +} + +/* + * futex_validate_op_cmp(val3) + * + * Validate an op/cmp argument for FUTEX_WAKE_OP. + */ +static int +futex_validate_op_cmp(int val3) +{ + int op = __SHIFTOUT(val3, FUTEX_OP_OP_MASK); + int cmp = __SHIFTOUT(val3, FUTEX_OP_CMP_MASK); + + if (op & FUTEX_OP_OPARG_SHIFT) { + int oparg = __SHIFTOUT(val3, FUTEX_OP_OPARG_MASK); + if (oparg < 0) + return EINVAL; + if (oparg >= 32) + return EINVAL; + op &= ~FUTEX_OP_OPARG_SHIFT; + } + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + break; + default: + return EINVAL; + } + + switch (cmp) { + case FUTEX_OP_CMP_EQ: + case FUTEX_OP_CMP_NE: + case FUTEX_OP_CMP_LT: + case FUTEX_OP_CMP_LE: + case FUTEX_OP_CMP_GT: + case FUTEX_OP_CMP_GE: + break; + default: + return EINVAL; + } + + return 0; +} + +/* + * futex_compute_op(oldval, val3) + * + * Apply a FUTEX_WAIT_OP operation to oldval. + */ +static int +futex_compute_op(int oldval, int val3) +{ + int op = __SHIFTOUT(val3, FUTEX_OP_OP_MASK); + int oparg = __SHIFTOUT(val3, FUTEX_OP_OPARG_MASK); + + if (op & FUTEX_OP_OPARG_SHIFT) { + KASSERT(oparg >= 0); + KASSERT(oparg < 32); + oparg = 1u << oparg; + op &= ~FUTEX_OP_OPARG_SHIFT; + } + + switch (op) { + case FUTEX_OP_SET: + return oparg; + + case FUTEX_OP_ADD: + /* + * Avoid signed arithmetic overflow by doing + * arithmetic unsigned and converting back to signed + * at the end. + */ + return (int)((unsigned)oldval + (unsigned)oparg); + + case FUTEX_OP_OR: + return oldval | oparg; + + case FUTEX_OP_ANDN: + return oldval & ~oparg; + + case FUTEX_OP_XOR: + return oldval ^ oparg; + + default: + panic("invalid futex op"); + } +} + +/* + * futex_compute_cmp(oldval, val3) + * + * Apply a FUTEX_WAIT_OP comparison to oldval. + */ +static bool +futex_compute_cmp(int oldval, int val3) +{ + int cmp = __SHIFTOUT(val3, FUTEX_OP_CMP_MASK); + int cmparg = __SHIFTOUT(val3, FUTEX_OP_CMPARG_MASK); + + switch (cmp) { + case FUTEX_OP_CMP_EQ: + return (oldval == cmparg); + + case FUTEX_OP_CMP_NE: + return (oldval != cmparg); + + case FUTEX_OP_CMP_LT: + return (oldval < cmparg); + + case FUTEX_OP_CMP_LE: + return (oldval <= cmparg); + + case FUTEX_OP_CMP_GT: + return (oldval > cmparg); + + case FUTEX_OP_CMP_GE: + return (oldval >= cmparg); + + default: + panic("invalid futex cmp operation"); + } +} + +/* + * futex_func_wake_op(uaddr, val, uaddr2, val2, val3, retval) + * + * Implement futex(FUTEX_WAKE_OP). + */ +static int +futex_func_wake_op(bool shared, int *uaddr, int val, int *uaddr2, int val2, + int val3, register_t *retval) +{ + struct futex *f = NULL, *f2 = NULL; + int oldval, newval, actual; + unsigned nwoken = 0; + int error; + + /* Reject negative number of wakeups. */ + if (val < 0 || val2 < 0) { + error = EINVAL; + goto out; + } + + /* Reject invalid operations before we start doing things. */ + if ((error = futex_validate_op_cmp(val3)) != 0) + goto out; + + /* Look up the first futex, if any. */ + error = futex_lookup(uaddr, shared, &f); + if (error) + goto out; + + /* Look up the second futex, if any. */ + error = futex_lookup(uaddr2, shared, &f2); + if (error) + goto out; + + /* + * Under the queue locks: + * + * 1. Read/modify/write: *uaddr2 op= oparg. + * 2. Unconditionally wake uaddr. + * 3. Conditionally wake uaddr2, if it previously matched val2. + */ + futex_queue_lock2(f, f2); + do { + error = futex_load(uaddr2, &oldval); + if (error) + goto out_unlock; + newval = futex_compute_op(oldval, val3); + error = ucas_int(uaddr2, oldval, newval, &actual); + if (error) + goto out_unlock; + } while (actual != oldval); + nwoken = (f ? futex_wake(f, val, NULL, 0, FUTEX_BITSET_MATCH_ANY) : 0); + if (f2 && futex_compute_cmp(oldval, val3)) + nwoken += futex_wake(f2, val2, NULL, 0, + FUTEX_BITSET_MATCH_ANY); + + /* Success! */ + error = 0; +out_unlock: + futex_queue_unlock2(f, f2); + +out: + /* Return the number of waiters woken. */ + *retval = nwoken; + + /* Release the futexes, if we got them. */ + if (f2) + futex_put(f2); + if (f) + futex_put(f); + return error; +} + +/* + * do_futex(uaddr, op, val, timeout, uaddr2, val2, val3) + * + * Implement the futex system call with all the parameters + * parsed out. + */ +int +do_futex(int *uaddr, int op, int val, const struct timespec *timeout, + int *uaddr2, int val2, int val3, register_t *retval) +{ + const bool shared = (op & FUTEX_PRIVATE_FLAG) ? false : true; + const clockid_t clkid = (op & FUTEX_CLOCK_REALTIME) ? CLOCK_REALTIME + : CLOCK_MONOTONIC; + + op &= FUTEX_CMD_MASK; + + switch (op) { + case FUTEX_WAIT: + return futex_func_wait(shared, uaddr, val, + FUTEX_BITSET_MATCH_ANY, timeout, clkid, TIMER_RELTIME, + retval); + + case FUTEX_WAKE: + val3 = FUTEX_BITSET_MATCH_ANY; + /* FALLTHROUGH */ + case FUTEX_WAKE_BITSET: + return futex_func_wake(shared, uaddr, val, val3, retval); + + case FUTEX_REQUEUE: + case FUTEX_CMP_REQUEUE: + return futex_func_requeue(shared, op, uaddr, val, uaddr2, + val2, val3, retval); + + case FUTEX_WAIT_BITSET: + return futex_func_wait(shared, uaddr, val, val3, timeout, + clkid, TIMER_ABSTIME, retval); + + case FUTEX_WAKE_OP: + return futex_func_wake_op(shared, uaddr, val, uaddr2, val2, + val3, retval); + + case FUTEX_FD: + default: + return ENOSYS; + } +} + +/* + * sys___futex(l, uap, retval) + * + * __futex(2) system call: generic futex operations. + */ +int +sys___futex(struct lwp *l, const struct sys___futex_args *uap, + register_t *retval) +{ + /* { + syscallarg(int *) uaddr; + syscallarg(int) op; + syscallarg(int) val; + syscallarg(const struct timespec *) timeout; + syscallarg(int *) uaddr2; + syscallarg(int) val2; + syscallarg(int) val3; + } */ + struct timespec ts, *tsp; + int error; + + /* + * Copy in the timeout argument, if specified. + */ + if (SCARG(uap, timeout)) { + error = copyin(SCARG(uap, timeout), &ts, sizeof(ts)); + if (error) + return error; + tsp = &ts; + } else { + tsp = NULL; + } + + return do_futex(SCARG(uap, uaddr), SCARG(uap, op), SCARG(uap, val), + tsp, SCARG(uap, uaddr2), SCARG(uap, val2), SCARG(uap, val3), + retval); +} + +/* + * sys___futex_set_robust_list(l, uap, retval) + * + * __futex_set_robust_list(2) system call for robust futexes. + */ +int +sys___futex_set_robust_list(struct lwp *l, + const struct sys___futex_set_robust_list_args *uap, register_t *retval) +{ + /* { + syscallarg(void *) head; + syscallarg(size_t) len; + } */ + void *head = SCARG(uap, head); + + if (SCARG(uap, len) != _FUTEX_ROBUST_HEAD_SIZE) + return EINVAL; + if ((uintptr_t)head % sizeof(u_long)) + return EINVAL; + + l->l_robust_head = (uintptr_t)head; + + return 0; +} + +/* + * sys___futex_get_robust_list(l, uap, retval) + * + * __futex_get_robust_list(2) system call for robust futexes. + */ +int +sys___futex_get_robust_list(struct lwp *l, + const struct sys___futex_get_robust_list_args *uap, register_t *retval) +{ + /* { + syscallarg(lwpid_t) lwpid; + syscallarg(void **) headp; + syscallarg(size_t *) lenp; + } */ + void *head; + const size_t len = _FUTEX_ROBUST_HEAD_SIZE; + int error; + + error = futex_robust_head_lookup(l, SCARG(uap, lwpid), &head); + if (error) + return error; + + /* Copy out the head pointer and the head structure length. */ + error = copyout(&head, SCARG(uap, headp), sizeof(head)); + if (__predict_true(error == 0)) { + error = copyout(&len, SCARG(uap, lenp), sizeof(len)); + } + + return error; +} + +/* + * release_futex(uva, tid) + * + * Try to release the robust futex at uva in the current process + * on lwp exit. If anything goes wrong, silently fail. It is the + * userland program's obligation to arrange correct behaviour. + */ +static void +release_futex(uintptr_t const uptr, lwpid_t const tid, bool const is_pi, + bool const is_pending) +{ + int *uaddr; + struct futex *f; + int oldval, newval, actual; + int error; + + /* If it's misaligned, tough. */ + if (__predict_false(uptr & 3)) + return; + uaddr = (int *)uptr; + + error = futex_load(uaddr, &oldval); + if (__predict_false(error)) + return; + + /* + * There are two race conditions we need to handle here: + * + * 1. User space cleared the futex word but died before + * being able to issue the wakeup. No wakeups will + * ever be issued, oops! + * + * 2. Awakened waiter died before being able to acquire + * the futex in user space. Any other waiters are + * now stuck, oops! + * + * In both of these cases, the futex word will be 0 (because + * it's updated before the wake is issued). The best we can + * do is detect this situation if it's the pending futex and + * issue a wake without modifying the futex word. + * + * XXX eventual PI handling? + */ + if (__predict_false(is_pending && (oldval & ~FUTEX_WAITERS) == 0)) { + register_t retval; + (void) futex_func_wake(/*shared*/true, uaddr, 1, + FUTEX_BITSET_MATCH_ANY, &retval); + return; + } + + /* Optimistically test whether we need to do anything at all. */ + if ((oldval & FUTEX_TID_MASK) != tid) + return; + + /* + * We need to handle the case where this thread owned the futex, + * but it was uncontended. In this case, there won't be any + * kernel state to look up. All we can do is mark the futex + * as a zombie to be mopped up the next time another thread + * attempts to acquire it. + * + * N.B. It's important to ensure to set FUTEX_OWNER_DIED in + * this loop, even if waiters appear while we're are doing + * so. This is beause FUTEX_WAITERS is set by user space + * before calling __futex() to wait, and the futex needs + * to be marked as a zombie when the new waiter gets into + * the kernel. + */ + if ((oldval & FUTEX_WAITERS) == 0) { + do { + error = futex_load(uaddr, &oldval); + if (error) + return; + if ((oldval & FUTEX_TID_MASK) != tid) + return; + newval = oldval | FUTEX_OWNER_DIED; + error = ucas_int(uaddr, oldval, newval, &actual); + if (error) + return; + } while (actual != oldval); + + /* + * If where is still no indication of waiters, then there is + * no more work for us to do. + */ + if ((oldval & FUTEX_WAITERS) == 0) + return; + } + + /* + * Look for a shared futex since we have no positive indication + * it is private. If we can't, tough. + */ + error = futex_lookup(uaddr, /*shared*/true, &f); + if (error) + return; + + /* + * If there's no kernel state for this futex, there's nothing to + * release. + */ + if (f == NULL) + return; + + /* Work under the futex queue lock. */ + futex_queue_lock(f); + + /* + * Fetch the word: if the tid doesn't match ours, skip; + * otherwise, set the owner-died bit, atomically. + */ + do { + error = futex_load(uaddr, &oldval); + if (error) + goto out; + if ((oldval & FUTEX_TID_MASK) != tid) + goto out; + newval = oldval | FUTEX_OWNER_DIED; + error = ucas_int(uaddr, oldval, newval, &actual); + if (error) + goto out; + } while (actual != oldval); + + /* + * If there may be waiters, try to wake one. If anything goes + * wrong, tough. + * + * XXX eventual PI handling? + */ + if (oldval & FUTEX_WAITERS) + (void)futex_wake(f, 1, NULL, 0, FUTEX_BITSET_MATCH_ANY); + + /* Unlock the queue and release the futex. */ +out: futex_queue_unlock(f); + futex_put(f); +} + +/* + * futex_robust_head_lookup(l, lwpid) + * + * Helper function to look up a robust head by LWP ID. + */ +int +futex_robust_head_lookup(struct lwp *l, lwpid_t lwpid, void **headp) +{ + struct proc *p = l->l_proc; + + /* Find the other lwp, if requested; otherwise use our robust head. */ + if (lwpid) { + mutex_enter(p->p_lock); + l = lwp_find(p, lwpid); + if (l == NULL) { + mutex_exit(p->p_lock); + return ESRCH; + } + *headp = (void *)l->l_robust_head; + mutex_exit(p->p_lock); + } else { + *headp = (void *)l->l_robust_head; + } + return 0; +} + +/* + * futex_fetch_robust_head(uaddr) + * + * Helper routine to fetch the futex robust list head that + * handles 32-bit binaries running on 64-bit kernels. + */ +static int +futex_fetch_robust_head(uintptr_t uaddr, u_long *rhead) +{ +#ifdef _LP64 + if (curproc->p_flag & PK_32) { + uint32_t rhead32[_FUTEX_ROBUST_HEAD_NWORDS]; + int error; + + error = copyin((void *)uaddr, rhead32, sizeof(rhead32)); + if (__predict_true(error == 0)) { + for (int i = 0; i < _FUTEX_ROBUST_HEAD_NWORDS; i++) { + if (i == _FUTEX_ROBUST_HEAD_OFFSET) { + /* + * Make sure the offset is sign- + * extended. + */ + rhead[i] = (int32_t)rhead32[i]; + } else { + rhead[i] = rhead32[i]; + } + } + } + return error; + } +#endif /* _L64 */ + + return copyin((void *)uaddr, rhead, + sizeof(*rhead) * _FUTEX_ROBUST_HEAD_NWORDS); +} + +/* + * futex_decode_robust_word(word) + * + * Decode a robust futex list word into the entry and entry + * properties. + */ +static inline void +futex_decode_robust_word(uintptr_t const word, uintptr_t * const entry, + bool * const is_pi) +{ + *is_pi = (word & _FUTEX_ROBUST_ENTRY_PI) ? true : false; + *entry = word & ~_FUTEX_ROBUST_ENTRY_PI; +} + +/* + * futex_fetch_robust_entry(uaddr) + * + * Helper routine to fetch and decode a robust futex entry + * that handles 32-bit binaries running on 64-bit kernels. + */ +static int +futex_fetch_robust_entry(uintptr_t const uaddr, uintptr_t * const valp, + bool * const is_pi) +{ + uintptr_t val = 0; + int error = 0; + +#ifdef _LP64 + if (curproc->p_flag & PK_32) { + uint32_t val32; + + error = ufetch_32((uint32_t *)uaddr, &val32); + if (__predict_true(error == 0)) + val = val32; + } else +#endif /* _LP64 */ + error = ufetch_long((u_long *)uaddr, (u_long *)&val); + if (__predict_false(error)) + return error; + + futex_decode_robust_word(val, valp, is_pi); + return 0; +} + +/* + * futex_release_all_lwp(l, tid) + * + * Release all l's robust futexes. If anything looks funny in + * the process, give up -- it's userland's responsibility to dot + * the i's and cross the t's. + */ +void +futex_release_all_lwp(struct lwp * const l, lwpid_t const tid) +{ + u_long rhead[_FUTEX_ROBUST_HEAD_NWORDS]; + int limit = 1000000; + int error; + + /* If there's no robust list there's nothing to do. */ + if (l->l_robust_head == 0) + return; + + /* Read the final snapshot of the robust list head. */ + error = futex_fetch_robust_head(l->l_robust_head, rhead); + if (error) { + printf("WARNING: pid %jd (%s) lwp %jd tid %jd:" + " unmapped robust futex list head\n", + (uintmax_t)l->l_proc->p_pid, l->l_proc->p_comm, + (uintmax_t)l->l_lid, (uintmax_t)tid); + return; + } + + const long offset = (long)rhead[_FUTEX_ROBUST_HEAD_OFFSET]; + + uintptr_t next, pending; + bool is_pi, pending_is_pi; + + futex_decode_robust_word(rhead[_FUTEX_ROBUST_HEAD_LIST], + &next, &is_pi); + futex_decode_robust_word(rhead[_FUTEX_ROBUST_HEAD_PENDING], + &pending, &pending_is_pi); + + /* + * Walk down the list of locked futexes and release them, up + * to one million of them before we give up. + */ + + while (next != l->l_robust_head && limit-- > 0) { + /* pending handled below. */ + if (next != pending) + release_futex(next + offset, tid, is_pi, false); + error = futex_fetch_robust_entry(next, &next, &is_pi); + if (error) + break; + preempt_point(); + } + if (limit <= 0) { + printf("WARNING: pid %jd (%s) lwp %jd tid %jd:" + " exhausted robust futex limit\n", + (uintmax_t)l->l_proc->p_pid, l->l_proc->p_comm, + (uintmax_t)l->l_lid, (uintmax_t)tid); + } + + /* If there's a pending futex, it may need to be released too. */ + if (pending != 0) { + release_futex(pending + offset, tid, pending_is_pi, true); + } +} diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 6f782a03ea8c..6b62112ce2cb 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.302 2020/04/22 21:22:21 thorpej Exp $ + $NetBSD: syscalls.master,v 1.303 2020/04/26 18:53:33 thorpej Exp $ ; @(#)syscalls.master 8.2 (Berkeley) 1/13/94 @@ -1015,3 +1015,10 @@ struct statvfs *buf, int flags); } 486 STD RUMP { int|sys|90|fhstatvfs1(const void *fhp, \ size_t fh_size, struct statvfs *buf, int flags); } +487 STD { int|sys||__futex(int *uaddr, int op, int val, \ + const struct timespec *timeout, \ + int *uaddr2, int val2, int val3); } +488 STD { int|sys||__futex_set_robust_list(void *head, \ + size_t len); } +489 STD { int|sys||__futex_get_robust_list(lwpid_t lwpid, \ + void **headp, size_t *lenp); } diff --git a/sys/sys/Makefile b/sys/sys/Makefile index baa0ffc5d783..af9e5bf3a94b 100644 --- a/sys/sys/Makefile +++ b/sys/sys/Makefile @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.172 2020/03/22 14:27:33 ad Exp $ +# $NetBSD: Makefile,v 1.173 2020/04/26 18:53:33 thorpej Exp $ .include @@ -21,7 +21,7 @@ INCS= acct.h agpio.h aio.h ansi.h aout_mids.h ataio.h atomic.h \ endian.h envsys.h errno.h evcnt.h event.h exec.h exec_aout.h \ exec_coff.h exec_ecoff.h exec_elf.h exec_script.h extattr.h extent.h \ fcntl.h fd_set.h fdio.h featuretest.h file.h filedesc.h filio.h \ - flashio.h float_ieee754.h fstypes.h gcq.h gmon.h gpio.h hash.h \ + flashio.h float_ieee754.h fstypes.h futex.h gcq.h gmon.h gpio.h hash.h \ idtype.h ieee754.h intr.h intrio.h inttypes.h ioccom.h ioctl.h \ ioctl_compat.h iostat.h ipc.h ipmi.h \ joystick.h \ diff --git a/sys/sys/futex.h b/sys/sys/futex.h new file mode 100644 index 000000000000..bb7c4cb2c806 --- /dev/null +++ b/sys/sys/futex.h @@ -0,0 +1,186 @@ +/* $NetBSD: futex.h,v 1.1 2020/04/26 18:53:33 thorpej Exp $ */ + +/*- + * Copyright (c) 2018, 2019 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Taylor R. Campbell and Jason R. Thorpe. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/*- + * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Emmanuel Dreyfus + * 4. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE THE AUTHOR AND CONTRIBUTORS ``AS IS'' + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_FUTEX_H_ +#define _SYS_FUTEX_H_ + +/* + * Definitions for the __futex(2) synchronization primitive. + * + * These definitions are intended to be ABI-compatible with the + * Linux futex(2) system call. + */ + +#include + +#define FUTEX_WAIT 0 +#define FUTEX_WAKE 1 +#define FUTEX_FD 2 +#define FUTEX_REQUEUE 3 +#define FUTEX_CMP_REQUEUE 4 +#define FUTEX_WAKE_OP 5 +#define FUTEX_LOCK_PI 6 +#define FUTEX_UNLOCK_PI 7 +#define FUTEX_TRYLOCK_PI 8 +#define FUTEX_WAIT_BITSET 9 +#define FUTEX_WAKE_BITSET 10 +#define FUTEX_WAIT_REQUEUE_PI 11 +#define FUTEX_CMP_REQUEUE_PI 12 + +#define FUTEX_PRIVATE_FLAG __BIT(7) +#define FUTEX_CLOCK_REALTIME __BIT(8) + +#define FUTEX_CMD_MASK \ + (~(FUTEX_PRIVATE_FLAG|FUTEX_CLOCK_REALTIME)) + +#define FUTEX_OP_OP_MASK __BITS(28,31) +#define FUTEX_OP_CMP_MASK __BITS(24,27) +#define FUTEX_OP_OPARG_MASK __BITS(12,23) +#define FUTEX_OP_CMPARG_MASK __BITS(0,11) + +#define FUTEX_OP(op, oparg, cmp, cmparg) \ + (__SHIFTIN(op, FUTEX_OP_OP_MASK) |\ + __SHIFTIN(oparg, FUTEX_OP_OPARG_MASK) |\ + __SHIFTIN(cmp, FUTEX_OP_CMP_MASK) |\ + __SHIFTIN(cmparg, FUTEX_OP_CMPARG_MASK)) + +#define FUTEX_OP_SET 0 +#define FUTEX_OP_ADD 1 +#define FUTEX_OP_OR 2 +#define FUTEX_OP_ANDN 3 +#define FUTEX_OP_XOR 4 +#define FUTEX_OP_OPARG_SHIFT 8 + +#define FUTEX_OP_CMP_EQ 0 +#define FUTEX_OP_CMP_NE 1 +#define FUTEX_OP_CMP_LT 2 +#define FUTEX_OP_CMP_LE 3 +#define FUTEX_OP_CMP_GT 4 +#define FUTEX_OP_CMP_GE 5 + +/* + * FUTEX_SYNCOBJ_0 and FUTEX_SYNCOBJ_1 are extensions to the Linux + * futex API that are reserved for individual consumers of futexes + * to define information specific to that synchronzation object. + * Note that as a result there is a system-wide upper limit of + * 268,435,455 threads (as opposed to 1,073,741,823). + */ +#define FUTEX_WAITERS ((int)__BIT(31)) +#define FUTEX_OWNER_DIED ((int)__BIT(30)) +#define FUTEX_SYNCOBJ_1 ((int)__BIT(29)) +#define FUTEX_SYNCOBJ_0 ((int)__BIT(28)) +#define FUTEX_TID_MASK ((int)__BITS(0,27)) + +#define FUTEX_BITSET_MATCH_ANY ((int)__BITS(0,31)) + +/* + * The robust futex ABI consists of an array of 3 longwords, the address + * of which is registered with the kernel on a per-thread basis: + * + * 0: A pointer to a singly-linked list of "lock entries". If the + * list is empty, this points back to the list itself. + * + * 1: An offset from address of the "lock entry" to the 32-bit futex + * word associated with that lock entry (may be negative). + * + * 2: A "pending" pointer, for locks that are in the process of being + * acquired or released. + * + * PI futexes are handled slightly differently. User-space indicates + * an entry is for a PI futex by setting the last-significant bit. + */ +#define _FUTEX_ROBUST_HEAD_LIST 0 +#define _FUTEX_ROBUST_HEAD_OFFSET 1 +#define _FUTEX_ROBUST_HEAD_PENDING 2 +#define _FUTEX_ROBUST_HEAD_NWORDS 3 +#define _FUTEX_ROBUST_HEAD_SIZE (_FUTEX_ROBUST_HEAD_NWORDS * \ + sizeof(u_long)) +#ifdef _LP64 +#define _FUTEX_ROBUST_HEAD_SIZE32 (_FUTEX_ROBUST_HEAD_NWORDS * \ + sizeof(uint32_t)) +#endif /* _LP64 */ +#define _FUTEX_ROBUST_ENTRY_PI __BIT(0) + +#ifdef __LIBC_FUTEX_PRIVATE +struct futex_robust_list { + struct futex_robust_list *next; +}; + +struct futex_robust_list_head { + struct futex_robust_list list; + long futex_offset; + struct futex_robust_list *pending_list; +}; +#endif /* __LIBC_FUTEX_PRIVATE */ + +#ifdef _KERNEL +struct lwp; + +int futex_robust_head_lookup(struct lwp *, lwpid_t, void **); +void futex_release_all_lwp(struct lwp *, lwpid_t); +int do_futex(int *, int, int, const struct timespec *, int *, int, + int, register_t *); +void futex_sys_init(void); +void futex_sys_fini(void); +#endif /* _KERNEL */ + +#endif /* ! _SYS_FUTEX_H_ */ diff --git a/sys/sys/lwp.h b/sys/sys/lwp.h index 1794438d014b..fe4a6d9fc803 100644 --- a/sys/sys/lwp.h +++ b/sys/sys/lwp.h @@ -1,4 +1,4 @@ -/* $NetBSD: lwp.h,v 1.207 2020/04/24 03:22:06 thorpej Exp $ */ +/* $NetBSD: lwp.h,v 1.208 2020/04/26 18:53:33 thorpej Exp $ */ /* * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010, 2019, 2020 @@ -136,7 +136,7 @@ struct lwp { bool l_vforkwaiting; /* a: vfork() waiting */ /* User-space synchronization. */ - uintptr_t l___rsvd0; /* reserved for future use */ + uintptr_t l_robust_head; /* !: list of robust futexes */ uint32_t l___rsvd1; /* reserved for future use */ #if PCU_UNIT_COUNT > 0 diff --git a/tests/lib/libc/sys/Makefile b/tests/lib/libc/sys/Makefile index e72e8ee64920..2b686e7e70ba 100644 --- a/tests/lib/libc/sys/Makefile +++ b/tests/lib/libc/sys/Makefile @@ -1,4 +1,4 @@ -# $NetBSD: Makefile,v 1.62 2020/04/18 17:44:53 christos Exp $ +# $NetBSD: Makefile,v 1.63 2020/04/26 18:53:33 thorpej Exp $ MKMAN= no @@ -18,6 +18,8 @@ TESTS_C+= t_connect TESTS_C+= t_dup TESTS_C+= t_fork TESTS_C+= t_fsync +TESTS_C+= t_futex_ops +TESTS_C+= t_futex_robust TESTS_C+= t_getcontext TESTS_C+= t_getgroups TESTS_C+= t_getitimer @@ -104,6 +106,9 @@ TESTS_C+= t_posix_fadvise LDADD.t_posix_fadvise+= ${LIBRUMPBASE} .endif +CPPFLAGS.t_futex_ops.c += -I${.CURDIR}/../../../../lib +CPPFLAGS.t_futex_robust.c += -I${.CURDIR}/../../../../lib + CPPFLAGS.t_lwp_create.c += -D_KERNTYPES CPPFLAGS.t_ptrace_wait.c += -D_KERNTYPES -D__TEST_FENV CPPFLAGS.t_ptrace_wait3.c += -D_KERNTYPES -D__TEST_FENV diff --git a/tests/lib/libc/sys/t_futex_ops.c b/tests/lib/libc/sys/t_futex_ops.c new file mode 100644 index 000000000000..99874b4ba3ee --- /dev/null +++ b/tests/lib/libc/sys/t_futex_ops.c @@ -0,0 +1,1294 @@ +/* $NetBSD: t_futex_ops.c,v 1.1 2020/04/26 18:53:33 thorpej Exp $ */ + +/*- + * Copyright (c) 2019, 2020 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__COPYRIGHT("@(#) Copyright (c) 2019, 2020\ + The NetBSD Foundation, inc. All rights reserved."); +__RCSID("$NetBSD: t_futex_ops.c,v 1.1 2020/04/26 18:53:33 thorpej Exp $"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#define LOAD(x) (*(volatile int *)(x)) +#define STORE(x, y) *(volatile int *)(x) = (y) + +#if 0 +#define DPRINTF(x) printf x +#else +#define DPRINTF(x) __nothing +#endif + +#define STACK_SIZE 65536 + +static volatile int futex_word; +static volatile int futex_word1; + +static volatile unsigned int nlwps_running; + +struct lwp_data { + ucontext_t context; + void (*func)(void *); + void *stack_base; + lwpid_t lwpid; + pid_t child; + lwpid_t threadid; + int wait_op; + int op_flags; + int bitset; + volatile int *futex_ptr; + volatile int *error_ptr; + int block_val; + + void (*exit_func)(void); + + int futex_error; +}; + +#define WAITER_LWP0 0 +#define WAITER_LWP1 1 +#define WAITER_LWP2 2 +#define WAITER_LWP3 3 +#define WAITER_LWP4 4 +#define WAITER_LWP5 5 +#define NLWPS 6 + +struct lwp_data lwp_data[NLWPS]; + +static const char *bs_path = "t_futex_ops_backing_store"; +static int bs_fd = -1; +static int *bs_addr = MAP_FAILED; +static void *bs_source_buffer = NULL; +static void *bs_verify_buffer = NULL; +static long bs_pagesize; + +static void +create_lwp_waiter(struct lwp_data *d) +{ + ATF_REQUIRE(_lwp_create(&d->context, 0, &d->lwpid) == 0); +} + +static void +exit_lwp_waiter(void) +{ + _lwp_exit(); +} + +static void +reap_lwp_waiter(struct lwp_data *d) +{ + ATF_REQUIRE(_lwp_wait(d->lwpid, NULL) == 0); +} + +static void +create_proc_waiter(struct lwp_data *d) +{ + pid_t pid; + + ATF_REQUIRE((pid = fork()) != -1); + if (pid == 0) { + (*d->func)(d); + _exit(666); /* backstop */ + } else + d->child = pid; +} + +static void +exit_proc_waiter(void) +{ + _exit(0); +} + +static void +reap_proc_waiter(struct lwp_data *d) +{ + int status; + + ATF_REQUIRE(waitpid(d->child, &status, 0) == d->child); + ATF_REQUIRE(WIFEXITED(status)); + ATF_REQUIRE(WEXITSTATUS(status) == 0); +} + +static void +setup_lwp_context(struct lwp_data *d, void (*func)(void *)) +{ + + memset(d, 0, sizeof(*d)); + d->stack_base = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_STACK | MAP_PRIVATE, -1, 0); + ATF_REQUIRE(d->stack_base != MAP_FAILED); + _lwp_makecontext(&d->context, func, d, NULL, d->stack_base, STACK_SIZE); + d->threadid = 0; + d->func = func; +} + +static void +simple_test_waiter_lwp(void *arg) +{ + struct lwp_data *d = arg; + + d->threadid = _lwp_self(); + + atomic_inc_uint(&nlwps_running); + membar_sync(); + + if (__futex(d->futex_ptr, d->wait_op | d->op_flags, + d->block_val, NULL, NULL, 0, d->bitset) == -1) { + d->futex_error = errno; + _lwp_exit(); + } else { + d->futex_error = 0; + } + + membar_sync(); + atomic_dec_uint(&nlwps_running); + + _lwp_exit(); +} + +static bool +verify_zero_bs(void) +{ + + if (bs_verify_buffer == NULL) { + bs_verify_buffer = malloc(bs_pagesize); + ATF_REQUIRE(bs_verify_buffer != NULL); + } + + ATF_REQUIRE(pread(bs_fd, bs_verify_buffer, + bs_pagesize, 0) == bs_pagesize); + + return (memcmp(bs_verify_buffer, bs_source_buffer, bs_pagesize) == 0); +} + +static void +create_bs(int map_flags) +{ + + bs_pagesize = sysconf(_SC_PAGESIZE); + ATF_REQUIRE(bs_pagesize > 0); + + if ((map_flags & (MAP_FILE | MAP_ANON)) == MAP_FILE) { + bs_source_buffer = calloc(1, bs_pagesize); + ATF_REQUIRE(bs_source_buffer != NULL); + + bs_fd = open(bs_path, O_RDWR | O_CREAT | O_EXCL, 0644); + ATF_REQUIRE(bs_fd != -1); + + ATF_REQUIRE(pwrite(bs_fd, bs_source_buffer, + bs_pagesize, 0) == bs_pagesize); + ATF_REQUIRE(verify_zero_bs()); + } + + bs_addr = mmap(NULL, bs_pagesize, PROT_READ | PROT_WRITE, + map_flags | MAP_HASSEMAPHORE, bs_fd, 0); + ATF_REQUIRE(bs_addr != MAP_FAILED); +} + +static void +cleanup_bs(void) +{ + + if (bs_fd != -1) { + (void) close(bs_fd); + bs_fd = -1; + (void) unlink(bs_path); + } + if (bs_source_buffer != NULL) { + free(bs_source_buffer); + bs_source_buffer = NULL; + } + if (bs_verify_buffer != NULL) { + free(bs_verify_buffer); + bs_verify_buffer = NULL; + } + if (bs_addr != MAP_FAILED) { + munmap(bs_addr, bs_pagesize); + bs_addr = MAP_FAILED; + } +} + +static void +do_cleanup(void) +{ + int i; + + for (i = 0; i < NLWPS; i++) { + struct lwp_data *d = &lwp_data[i]; + if (d->stack_base != NULL && d->stack_base != MAP_FAILED) { + (void) munmap(d->stack_base, STACK_SIZE); + } + } + memset(lwp_data, 0, sizeof(lwp_data)); + STORE(&futex_word, 0); + STORE(&futex_word1, 0); + nlwps_running = 0; + + cleanup_bs(); +} + +/*****************************************************************************/ + +static void +wait_wake_test_waiter_lwp(void *arg) +{ + struct lwp_data *d = arg; + + d->threadid = _lwp_self(); + + STORE(d->futex_ptr, 1); + membar_sync(); + + /* This will block because *futex_ptr == 1. */ + if (__futex(d->futex_ptr, FUTEX_WAIT | d->op_flags, + 1, NULL, NULL, 0, 0) == -1) { + STORE(d->error_ptr, errno); + (*d->exit_func)(); + } else { + STORE(d->error_ptr, 0); + } + + do { + membar_sync(); + sleep(1); + } while (LOAD(d->futex_ptr) != 0); + + STORE(d->futex_ptr, 2); + membar_sync(); + + do { + membar_sync(); + sleep(1); + } while (LOAD(d->futex_ptr) != 3); + + /* This will not block because futex_word != 666. */ + if (__futex(d->futex_ptr, FUTEX_WAIT | d->op_flags, + 666, NULL, NULL, 0, 0) == -1) { + /* This SHOULD be EAGAIN. */ + STORE(d->error_ptr, errno); + } + + STORE(d->futex_ptr, 4); + membar_sync(); + + (*d->exit_func)(); +} + +static void +do_futex_wait_wake_test(volatile int *futex_ptr, volatile int *error_ptr, + void (*create_func)(struct lwp_data *), + void (*exit_func)(void), + void (*reap_func)(struct lwp_data *), + int flags) +{ + struct lwp_data *wlwp = &lwp_data[WAITER_LWP0]; + int tries; + + if (error_ptr == NULL) + error_ptr = &wlwp->futex_error; + + if (create_func == NULL) + create_func = create_lwp_waiter; + if (exit_func == NULL) + exit_func = exit_lwp_waiter; + if (reap_func == NULL) + reap_func = reap_lwp_waiter; + + setup_lwp_context(wlwp, wait_wake_test_waiter_lwp); + + DPRINTF(("futex_basic_wait_wake: testing with flags 0x%x\n", flags)); + wlwp->op_flags = flags; + wlwp->error_ptr = error_ptr; + STORE(error_ptr, -1); + wlwp->futex_ptr = futex_ptr; + STORE(futex_ptr, 0); + wlwp->exit_func = exit_func; + membar_sync(); + + DPRINTF(("futex_basic_wait_wake: creating watier LWP\n")); + (*create_func)(wlwp); + + DPRINTF(("futex_basic_wait_wake: waiting for LWP %d to enter futex\n", + wlwp->lwpid)); + for (tries = 0; tries < 5; tries++) { + membar_sync(); + if (LOAD(futex_ptr) == 1) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE(LOAD(futex_ptr) == 1); + + /* + * If the LWP is blocked in the futex, it will not have yet + * modified *error_ptr. + */ + DPRINTF(("futex_basic_wait_wake: checking for successful wait (%d)\n", + LOAD(error_ptr))); + for (tries = 0; tries < 5; tries++) { + membar_sync(); + if (LOAD(error_ptr) == -1) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE(LOAD(error_ptr) == -1); + + /* Make sure invalid #wakes in rejected. */ + ATF_REQUIRE_ERRNO(EINVAL, + __futex(futex_ptr, FUTEX_WAKE | flags, + -1, NULL, NULL, 0, 0) == -1); + + DPRINTF(("futex_basic_wait_wake: waking 1 waiter\n")); + ATF_REQUIRE(__futex(futex_ptr, FUTEX_WAKE | flags, + 1, NULL, NULL, 0, 0) == 1); + + DPRINTF(("futex_basic_wait_wake: checking for successful wake (%d)\n", + LOAD(error_ptr))); + for (tries = 0; tries < 5; tries++) { + membar_sync(); + if (LOAD(error_ptr) == 0) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE(LOAD(error_ptr) == 0); + + STORE(futex_ptr, 0); + membar_sync(); + + DPRINTF(("futex_basic_wait_wake: waiting for LWP to advance (2)\n")); + for (tries = 0; tries < 5; tries++) { + membar_sync(); + if (LOAD(futex_ptr) == 2) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE(LOAD(futex_ptr) == 2); + + STORE(futex_ptr, 3); + membar_sync(); + + DPRINTF(("futex_basic_wait_wake: waiting for LWP to advance (4)\n")); + for (tries = 0; tries < 5; tries++) { + membar_sync(); + if (LOAD(futex_ptr) == 4) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE(LOAD(futex_ptr) == 4); + + DPRINTF(("futex_basic_wait_wake: checking for expected EGAIN\n")); + ATF_REQUIRE(LOAD(error_ptr) == EAGAIN); + + DPRINTF(("futex_basic_wait_wake: reaping LWP %d\n", wlwp->lwpid)); + (*reap_func)(wlwp); +} + +ATF_TC_WITH_CLEANUP(futex_basic_wait_wake_private); +ATF_TC_HEAD(futex_basic_wait_wake_private, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests basic futex WAIT + WAKE operations (PRIVATE)"); +} +ATF_TC_BODY(futex_basic_wait_wake_private, tc) +{ + do_futex_wait_wake_test(&futex_word, NULL, + NULL, NULL, NULL, + FUTEX_PRIVATE_FLAG); +} +ATF_TC_CLEANUP(futex_basic_wait_wake_private, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_basic_wait_wake_shared); +ATF_TC_HEAD(futex_basic_wait_wake_shared, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests basic futex WAIT + WAKE operations (SHARED)"); +} +ATF_TC_BODY(futex_basic_wait_wake_shared, tc) +{ + do_futex_wait_wake_test(&futex_word, NULL, + NULL, NULL, NULL, + 0); +} +ATF_TC_CLEANUP(futex_basic_wait_wake_shared, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_wait_wake_anon_bs_private); +ATF_TC_HEAD(futex_wait_wake_anon_bs_private, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex WAIT + WAKE operations (MAP_ANON + PRIVATE)"); +} +ATF_TC_BODY(futex_wait_wake_anon_bs_private, tc) +{ + create_bs(MAP_ANON | MAP_PRIVATE); + do_futex_wait_wake_test(&bs_addr[0], NULL, + NULL, NULL, NULL, + FUTEX_PRIVATE_FLAG); +} +ATF_TC_CLEANUP(futex_wait_wake_anon_bs_private, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_wait_wake_anon_bs_shared); +ATF_TC_HEAD(futex_wait_wake_anon_bs_shared, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex WAIT + WAKE operations (MAP_ANON + SHARED)"); +} +ATF_TC_BODY(futex_wait_wake_anon_bs_shared, tc) +{ + create_bs(MAP_ANON | MAP_PRIVATE); + do_futex_wait_wake_test(&bs_addr[0], NULL, + NULL, NULL, NULL, + 0); +} +ATF_TC_CLEANUP(futex_wait_wake_anon_bs_shared, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_wait_wake_file_bs_private); +ATF_TC_HEAD(futex_wait_wake_file_bs_private, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex WAIT + WAKE operations (MAP_FILE + PRIVATE)"); +} +ATF_TC_BODY(futex_wait_wake_file_bs_private, tc) +{ + /* + * This combination (non-COW mapped file + PRIVATE futex) + * doesn't really make sense, but we should make sure it + * works as expected. + */ + create_bs(MAP_FILE | MAP_SHARED); + do_futex_wait_wake_test(&bs_addr[0], NULL, + NULL, NULL, NULL, + FUTEX_PRIVATE_FLAG); + ATF_REQUIRE(! verify_zero_bs()); +} +ATF_TC_CLEANUP(futex_wait_wake_file_bs_private, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_wait_wake_file_bs_cow_private); +ATF_TC_HEAD(futex_wait_wake_file_bs_cow_private, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex WAIT + WAKE operations (MAP_FILE COW + PRIVATE)"); +} +ATF_TC_BODY(futex_wait_wake_file_bs_cow_private, tc) +{ + create_bs(MAP_FILE | MAP_PRIVATE); + do_futex_wait_wake_test(&bs_addr[0], NULL, + NULL, NULL, NULL, + FUTEX_PRIVATE_FLAG); + ATF_REQUIRE(verify_zero_bs()); +} +ATF_TC_CLEANUP(futex_wait_wake_file_bs_cow_private, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_wait_wake_file_bs_shared); +ATF_TC_HEAD(futex_wait_wake_file_bs_shared, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex WAIT + WAKE operations (MAP_FILE + SHARED)"); +} +ATF_TC_BODY(futex_wait_wake_file_bs_shared, tc) +{ + create_bs(MAP_FILE | MAP_SHARED); + do_futex_wait_wake_test(&bs_addr[0], NULL, + NULL, NULL, NULL, + 0); + ATF_REQUIRE(! verify_zero_bs()); +} +ATF_TC_CLEANUP(futex_wait_wake_file_bs_shared, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_wait_wake_file_bs_cow_shared); +ATF_TC_HEAD(futex_wait_wake_file_bs_cow_shared, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex WAIT + WAKE operations (MAP_FILE COW + SHARED)"); +} +ATF_TC_BODY(futex_wait_wake_file_bs_cow_shared, tc) +{ + /* + * This combination (COW mapped file + SHARED futex) + * doesn't really make sense, but we should make sure it + * works as expected. + */ + create_bs(MAP_FILE | MAP_PRIVATE); + do_futex_wait_wake_test(&bs_addr[0], NULL, + NULL, NULL, NULL, + 0); + ATF_REQUIRE(verify_zero_bs()); +} +ATF_TC_CLEANUP(futex_wait_wake_file_bs_cow_shared, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_wait_wake_anon_bs_shared_proc); +ATF_TC_HEAD(futex_wait_wake_anon_bs_shared_proc, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests multiproc futex WAIT + WAKE operations (MAP_ANON + SHARED)"); +} +ATF_TC_BODY(futex_wait_wake_anon_bs_shared_proc, tc) +{ + create_bs(MAP_ANON | MAP_SHARED); + do_futex_wait_wake_test(&bs_addr[0], &bs_addr[1], + create_proc_waiter, + exit_proc_waiter, + reap_proc_waiter, + 0); +} +ATF_TC_CLEANUP(futex_wait_wake_anon_bs_shared_proc, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_wait_wake_file_bs_shared_proc); +ATF_TC_HEAD(futex_wait_wake_file_bs_shared_proc, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests multiproc futex WAIT + WAKE operations (MAP_ANON + SHARED)"); +} +ATF_TC_BODY(futex_wait_wake_file_bs_shared_proc, tc) +{ + create_bs(MAP_FILE | MAP_SHARED); + do_futex_wait_wake_test(&bs_addr[0], &bs_addr[1], + create_proc_waiter, + exit_proc_waiter, + reap_proc_waiter, + 0); +} +ATF_TC_CLEANUP(futex_wait_wake_file_bs_shared_proc, tc) +{ + do_cleanup(); +} + +/*****************************************************************************/ + +ATF_TC(futex_wait_pointless_bitset); +ATF_TC_HEAD(futex_wait_pointless_bitset, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests basic futex WAIT + WAKE operations (SHARED)"); +} +ATF_TC_BODY(futex_wait_pointless_bitset, tc) +{ + + futex_word = 1; + /* This won't block because no bits are set. */ + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG, + 1, NULL, NULL, 0, 0) == 0); +} + +static void +do_futex_wait_wake_bitset_test(int flags) +{ + struct lwp_data *wlwp0 = &lwp_data[WAITER_LWP0]; + struct lwp_data *wlwp1 = &lwp_data[WAITER_LWP1]; + int i, tries; + + for (i = WAITER_LWP0; i <= WAITER_LWP1; i++) { + setup_lwp_context(&lwp_data[i], simple_test_waiter_lwp); + lwp_data[i].op_flags = flags; + lwp_data[i].futex_error = -1; + lwp_data[i].bitset = __BIT(i); + lwp_data[i].wait_op = FUTEX_WAIT_BITSET; + lwp_data[i].futex_ptr = &futex_word; + lwp_data[i].block_val = 1; + } + + STORE(&futex_word, 1); + membar_sync(); + + ATF_REQUIRE(_lwp_create(&wlwp0->context, 0, &wlwp0->lwpid) == 0); + ATF_REQUIRE(_lwp_create(&wlwp1->context, 0, &wlwp1->lwpid) == 0); + + for (tries = 0; tries < 5; tries++) { + membar_sync(); + if (nlwps_running == 2) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE_EQ_MSG(nlwps_running, 2, "waiters failed to start"); + + /* Ensure they're blocked. */ + ATF_REQUIRE(wlwp0->futex_error == -1); + ATF_REQUIRE(wlwp1->futex_error == -1); + + /* Make sure invalid #wakes in rejected. */ + ATF_REQUIRE_ERRNO(EINVAL, + __futex(&futex_word, FUTEX_WAKE_BITSET | flags, + -1, NULL, NULL, 0, 0) == -1); + + /* This should result in no wakeups because no bits are set. */ + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_BITSET | flags, + INT_MAX, NULL, NULL, 0, 0) == 0); + + /* This should result in no wakeups because the wrongs bits are set. */ + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_BITSET | flags, + INT_MAX, NULL, NULL, 0, + ~(wlwp0->bitset | wlwp1->bitset)) == 0); + + /* Trust, but verify. */ + sleep(1); + for (tries = 0; tries < 5; tries++) { + membar_sync(); + if (nlwps_running == 2) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE_EQ_MSG(nlwps_running, 2, "waiters exited unexpectedly"); + + /* Wake up the first LWP. */ + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_BITSET | flags, + INT_MAX, NULL, NULL, 0, + wlwp0->bitset) == 1); + sleep(1); + for (tries = 0; tries < 5; tries++) { + membar_sync(); + if (nlwps_running == 1) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE(nlwps_running == 1); + ATF_REQUIRE(wlwp0->futex_error == 0); + ATF_REQUIRE(_lwp_wait(wlwp0->lwpid, NULL) == 0); + + /* Wake up the second LWP. */ + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_BITSET | flags, + INT_MAX, NULL, NULL, 0, + wlwp1->bitset) == 1); + sleep(1); + for (tries = 0; tries < 5; tries++) { + membar_sync(); + if (nlwps_running == 0) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE(nlwps_running == 0); + ATF_REQUIRE(wlwp1->futex_error == 0); + ATF_REQUIRE(_lwp_wait(wlwp1->lwpid, NULL) == 0); +} + +ATF_TC_WITH_CLEANUP(futex_wait_wake_bitset); +ATF_TC_HEAD(futex_wait_wake_bitset, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex WAIT_BITSET + WAKE_BITSET operations"); +} +ATF_TC_BODY(futex_wait_wake_bitset, tc) +{ + do_futex_wait_wake_bitset_test(FUTEX_PRIVATE_FLAG); +} +ATF_TC_CLEANUP(futex_wait_wake_bitset, tc) +{ + do_cleanup(); +} + +/*****************************************************************************/ + +static void +do_futex_requeue_test(int flags, int op) +{ + struct lwp_data *wlwp0 = &lwp_data[WAITER_LWP0]; + struct lwp_data *wlwp1 = &lwp_data[WAITER_LWP1]; + struct lwp_data *wlwp2 = &lwp_data[WAITER_LWP2]; + struct lwp_data *wlwp3 = &lwp_data[WAITER_LWP3]; + const int good_val3 = (op == FUTEX_CMP_REQUEUE) ? 1 : 0; + const int bad_val3 = (op == FUTEX_CMP_REQUEUE) ? 666 : 0; + int i, tries; + + for (i = WAITER_LWP0; i <= WAITER_LWP3; i++) { + setup_lwp_context(&lwp_data[i], simple_test_waiter_lwp); + lwp_data[i].op_flags = flags; + lwp_data[i].futex_error = -1; + lwp_data[i].futex_ptr = &futex_word; + lwp_data[i].block_val = 1; + lwp_data[i].bitset = 0; + lwp_data[i].wait_op = FUTEX_WAIT; + } + + STORE(&futex_word, 1); + STORE(&futex_word1, 1); + membar_sync(); + + ATF_REQUIRE(_lwp_create(&wlwp0->context, 0, &wlwp0->lwpid) == 0); + ATF_REQUIRE(_lwp_create(&wlwp1->context, 0, &wlwp1->lwpid) == 0); + ATF_REQUIRE(_lwp_create(&wlwp2->context, 0, &wlwp2->lwpid) == 0); + ATF_REQUIRE(_lwp_create(&wlwp3->context, 0, &wlwp3->lwpid) == 0); + + for (tries = 0; tries < 5; tries++) { + membar_sync(); + if (nlwps_running == 4) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE_EQ_MSG(nlwps_running, 4, "waiters failed to start"); + + /* Ensure they're blocked. */ + ATF_REQUIRE(wlwp0->futex_error == -1); + ATF_REQUIRE(wlwp1->futex_error == -1); + ATF_REQUIRE(wlwp2->futex_error == -1); + ATF_REQUIRE(wlwp3->futex_error == -1); + + /* Make sure invalid #wakes and #requeues are rejected. */ + ATF_REQUIRE_ERRNO(EINVAL, + __futex(&futex_word, op | flags, + -1, NULL, &futex_word1, INT_MAX, bad_val3) == -1); + + ATF_REQUIRE_ERRNO(EINVAL, + __futex(&futex_word, op | flags, + 0, NULL, &futex_word1, -1, bad_val3) == -1); + + /* + * FUTEX 0: 4 LWPs + * FUTEX 1: 0 LWPs + */ + + if (op == FUTEX_CMP_REQUEUE) { + /* This should fail because the futex_word value is 1. */ + ATF_REQUIRE_ERRNO(EAGAIN, + __futex(&futex_word, op | flags, + 0, NULL, &futex_word1, INT_MAX, bad_val3) == -1); + } + + /* + * FUTEX 0: 4 LWPs + * FUTEX 1: 0 LWPs + */ + + /* Move all waiters from 0 to 1. */ + ATF_REQUIRE(__futex(&futex_word, op | flags, + 0, NULL, &futex_word1, INT_MAX, good_val3) == 0); + + /* + * FUTEX 0: 0 LWPs + * FUTEX 1: 4 LWPs + */ + + if (op == FUTEX_CMP_REQUEUE) { + /* This should fail because the futex_word1 value is 1. */ + ATF_REQUIRE_ERRNO(EAGAIN, + __futex(&futex_word1, op | flags, + 1, NULL, &futex_word, 1, bad_val3) == -1); + } + + /* + * FUTEX 0: 0 LWPs + * FUTEX 1: 4 LWPs + */ + + /* Wake one waiter on 1, move one waiter to 0. */ + ATF_REQUIRE(__futex(&futex_word1, op | flags, + 1, NULL, &futex_word, 1, good_val3) == 1); + + /* + * FUTEX 0: 1 LWP + * FUTEX 1: 2 LWPs + */ + + /* Wake all waiters on 0 (should be 1). */ + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE | flags, + INT_MAX, NULL, NULL, 0, 0) == 1); + + /* Wake all waiters on 1 (should be 2). */ + ATF_REQUIRE(__futex(&futex_word1, FUTEX_WAKE | flags, + INT_MAX, NULL, NULL, 0, 0) == 2); + + /* Trust, but verify. */ + sleep(1); + for (tries = 0; tries < 5; tries++) { + membar_sync(); + if (nlwps_running == 0) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE_EQ_MSG(nlwps_running, 0, "waiters failed to exit"); + + ATF_REQUIRE(_lwp_wait(wlwp0->lwpid, NULL) == 0); + ATF_REQUIRE(_lwp_wait(wlwp1->lwpid, NULL) == 0); + ATF_REQUIRE(_lwp_wait(wlwp2->lwpid, NULL) == 0); + ATF_REQUIRE(_lwp_wait(wlwp3->lwpid, NULL) == 0); +} + +ATF_TC_WITH_CLEANUP(futex_requeue); +ATF_TC_HEAD(futex_requeue, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex REQUEUE operations"); +} +ATF_TC_BODY(futex_requeue, tc) +{ + do_futex_requeue_test(FUTEX_PRIVATE_FLAG, FUTEX_REQUEUE); +} +ATF_TC_CLEANUP(futex_requeue, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_cmp_requeue); +ATF_TC_HEAD(futex_cmp_requeue, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex CMP_REQUEUE operations"); +} +ATF_TC_BODY(futex_cmp_requeue, tc) +{ + do_futex_requeue_test(FUTEX_PRIVATE_FLAG, FUTEX_CMP_REQUEUE); +} +ATF_TC_CLEANUP(futex_cmp_requeue, tc) +{ + do_cleanup(); +} + +/*****************************************************************************/ + +static void +do_futex_wake_op_op_test(int flags) +{ + int op; + + futex_word = 0; + futex_word1 = 0; + + /* + * The op= operations should work even if there are no waiters. + */ + + /* + * Because these operations use both futex addresses, exercise + * rejecting unaligned futex addresses here. + */ + op = FUTEX_OP(FUTEX_OP_SET, 1, FUTEX_OP_CMP_EQ, 0); + ATF_REQUIRE_ERRNO(EINVAL, + __futex((int *)1, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 0, op) == -1); + ATF_REQUIRE(futex_word1 == 0); + + ATF_REQUIRE_ERRNO(EINVAL, + __futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, (int *)1, 0, op) == -1); + ATF_REQUIRE(futex_word == 0); + + /* Check unmapped uaddr2 handling, too. */ + ATF_REQUIRE_ERRNO(EFAULT, + __futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, NULL, 0, op) == -1); + ATF_REQUIRE(futex_word == 0); + + op = FUTEX_OP(FUTEX_OP_SET, 1, FUTEX_OP_CMP_EQ, 0); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 0, op) == 0); + ATF_REQUIRE(futex_word1 == 1); + + op = FUTEX_OP(FUTEX_OP_ADD, 1, FUTEX_OP_CMP_EQ, 0); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 0, op) == 0); + ATF_REQUIRE(futex_word1 == 2); + + op = FUTEX_OP(FUTEX_OP_OR, 2, FUTEX_OP_CMP_EQ, 0); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 0, op) == 0); + ATF_REQUIRE(futex_word1 == 2); + + /* This should fail because of invalid shift value 32. */ + op = FUTEX_OP(FUTEX_OP_OR | FUTEX_OP_OPARG_SHIFT, 32, + FUTEX_OP_CMP_EQ, 0); + ATF_REQUIRE_ERRNO(EINVAL, + __futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 0, op) == -1); + ATF_REQUIRE(futex_word1 == 2); + + op = FUTEX_OP(FUTEX_OP_OR | FUTEX_OP_OPARG_SHIFT, 31, + FUTEX_OP_CMP_EQ, 0); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 0, op) == 0); + ATF_REQUIRE(futex_word1 == (int)0x80000002); + + op = FUTEX_OP(FUTEX_OP_ANDN | FUTEX_OP_OPARG_SHIFT, 31, + FUTEX_OP_CMP_EQ, 0); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 0, op) == 0); + ATF_REQUIRE(futex_word1 == 2); + + op = FUTEX_OP(FUTEX_OP_XOR, 2, FUTEX_OP_CMP_EQ, 0); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 0, op) == 0); + ATF_REQUIRE(futex_word1 == 0); +} + +ATF_TC_WITH_CLEANUP(futex_wake_op_op); +ATF_TC_HEAD(futex_wake_op_op, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex WAKE_OP OP operations"); +} +ATF_TC_BODY(futex_wake_op_op, tc) +{ + do_futex_wake_op_op_test(FUTEX_PRIVATE_FLAG); +} +ATF_TC_CLEANUP(futex_wake_op_op, tc) +{ + do_cleanup(); +} + +static void +create_wake_op_test_lwps(int flags) +{ + int i; + + futex_word1 = 0; + membar_sync(); + + for (i = WAITER_LWP0; i <= WAITER_LWP5; i++) { + setup_lwp_context(&lwp_data[i], simple_test_waiter_lwp); + lwp_data[i].op_flags = flags; + lwp_data[i].futex_error = -1; + lwp_data[i].futex_ptr = &futex_word1; + lwp_data[i].block_val = 0; + lwp_data[i].bitset = 0; + lwp_data[i].wait_op = FUTEX_WAIT; + ATF_REQUIRE(_lwp_create(&lwp_data[i].context, 0, + &lwp_data[i].lwpid) == 0); + } + + for (i = 0; i < 5; i++) { + membar_sync(); + if (nlwps_running == 6) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE_EQ_MSG(nlwps_running, 6, "waiters failed to start"); + + /* Ensure they're blocked. */ + for (i = WAITER_LWP0; i <= WAITER_LWP5; i++) { + ATF_REQUIRE(lwp_data[i].futex_error == -1); + } +} + +static void +reap_wake_op_test_lwps(void) +{ + int i; + + for (i = WAITER_LWP0; i <= WAITER_LWP5; i++) { + ATF_REQUIRE(_lwp_wait(lwp_data[i].lwpid, NULL) == 0); + } +} + +static void +do_futex_wake_op_cmp_test(int flags) +{ + int tries, op; + + futex_word = 0; + membar_sync(); + + /* + * Verify and negative and positive for each individual + * compare. + */ + + create_wake_op_test_lwps(flags); + + /* #LWPs = 6 */ + op = FUTEX_OP(FUTEX_OP_SET, 0, FUTEX_OP_CMP_EQ, 1); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 1, op) == 0); + ATF_REQUIRE(futex_word1 == 0); + + op = FUTEX_OP(FUTEX_OP_SET, 1, FUTEX_OP_CMP_EQ, 0); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 1, op) == 1); + ATF_REQUIRE(futex_word1 == 1); + + /* #LWPs = 5 */ + op = FUTEX_OP(FUTEX_OP_SET, 1, FUTEX_OP_CMP_NE, 1); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 1, op) == 0); + ATF_REQUIRE(futex_word1 == 1); + + op = FUTEX_OP(FUTEX_OP_SET, 2, FUTEX_OP_CMP_NE, 2); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 1, op) == 1); + ATF_REQUIRE(futex_word1 == 2); + + /* #LWPs = 4 */ + op = FUTEX_OP(FUTEX_OP_SET, 2, FUTEX_OP_CMP_LT, 2); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 1, op) == 0); + ATF_REQUIRE(futex_word1 == 2); + + op = FUTEX_OP(FUTEX_OP_SET, 2, FUTEX_OP_CMP_LT, 3); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 1, op) == 1); + ATF_REQUIRE(futex_word1 == 2); + + /* #LWPs = 3 */ + op = FUTEX_OP(FUTEX_OP_SET, 1, FUTEX_OP_CMP_LE, 1); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 1, op) == 0); + ATF_REQUIRE(futex_word1 == 1); + + op = FUTEX_OP(FUTEX_OP_SET, 1, FUTEX_OP_CMP_LE, 1); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 1, op) == 1); + ATF_REQUIRE(futex_word1 == 1); + + /* #LWPs = 2 */ + op = FUTEX_OP(FUTEX_OP_SET, 3, FUTEX_OP_CMP_GT, 3); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 1, op) == 0); + ATF_REQUIRE(futex_word1 == 3); + + op = FUTEX_OP(FUTEX_OP_SET, 2, FUTEX_OP_CMP_GT, 2); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 1, op) == 1); + ATF_REQUIRE(futex_word1 == 2); + + /* #LWPs = 1 */ + op = FUTEX_OP(FUTEX_OP_SET, 3, FUTEX_OP_CMP_GE, 4); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 1, op) == 0); + ATF_REQUIRE(futex_word1 == 3); + + op = FUTEX_OP(FUTEX_OP_SET, 2, FUTEX_OP_CMP_GE, 3); + ATF_REQUIRE(__futex(&futex_word, FUTEX_WAKE_OP | flags, + 0, NULL, &futex_word1, 1, op) == 1); + ATF_REQUIRE(futex_word1 == 2); + + /* #LWPs = 0 */ + + /* Trust, but verify. */ + sleep(1); + for (tries = 0; tries < 5; tries++) { + membar_sync(); + if (nlwps_running == 0) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE_EQ_MSG(nlwps_running, 0, "waiters failed to exit"); + + reap_wake_op_test_lwps(); + + /* + * Verify wakes on uaddr work even if the uaddr2 comparison + * fails. + */ + + create_wake_op_test_lwps(flags); + + /* #LWPs = 6 */ + ATF_REQUIRE(futex_word == 0); + op = FUTEX_OP(FUTEX_OP_SET, 0, FUTEX_OP_CMP_EQ, 666); + ATF_REQUIRE(__futex(&futex_word1, FUTEX_WAKE_OP | flags, + INT_MAX, NULL, &futex_word, 0, op) == 6); + ATF_REQUIRE(futex_word == 0); + + /* #LWPs = 0 */ + + /* Trust, but verify. */ + sleep(1); + for (tries = 0; tries < 5; tries++) { + membar_sync(); + if (nlwps_running == 0) + break; + sleep(1); + } + membar_sync(); + ATF_REQUIRE_EQ_MSG(nlwps_running, 0, "waiters failed to exit"); + + reap_wake_op_test_lwps(); +} + +ATF_TC_WITH_CLEANUP(futex_wake_op_cmp); +ATF_TC_HEAD(futex_wake_op_cmp, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex WAKE_OP CMP operations"); +} +ATF_TC_BODY(futex_wake_op_cmp, tc) +{ + do_futex_wake_op_cmp_test(FUTEX_PRIVATE_FLAG); +} +ATF_TC_CLEANUP(futex_wake_op_cmp, tc) +{ + do_cleanup(); +} + +/*****************************************************************************/ + +static void +do_futex_wait_timeout(bool relative, clockid_t clock) +{ + struct timespec ts; + struct timespec deadline; + int op = relative ? FUTEX_WAIT : FUTEX_WAIT_BITSET; + + if (clock == CLOCK_REALTIME) + op |= FUTEX_CLOCK_REALTIME; + + ATF_REQUIRE(clock_gettime(clock, &deadline) == 0); + deadline.tv_sec += 2; + if (relative) { + ts.tv_sec = 2; + ts.tv_nsec = 0; + } else { + ts = deadline; + } + + futex_word = 1; + ATF_REQUIRE_ERRNO(ETIMEDOUT, + __futex(&futex_word, op | FUTEX_PRIVATE_FLAG, + 1, &ts, NULL, 0, FUTEX_BITSET_MATCH_ANY) == -1); + + /* Can't reliably check CLOCK_REALTIME in the presence of NTP. */ + if (clock != CLOCK_REALTIME) { + ATF_REQUIRE(clock_gettime(clock, &ts) == 0); + ATF_REQUIRE(ts.tv_sec >= deadline.tv_sec); + ATF_REQUIRE(ts.tv_sec > deadline.tv_sec || + ts.tv_nsec >= deadline.tv_nsec); + } +} + +ATF_TC(futex_wait_timeout_relative); +ATF_TC_HEAD(futex_wait_timeout_relative, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex WAIT with relative timeout"); +} +ATF_TC_BODY(futex_wait_timeout_relative, tc) +{ + do_futex_wait_timeout(true, CLOCK_MONOTONIC); +} + +ATF_TC(futex_wait_timeout_relative_rt); +ATF_TC_HEAD(futex_wait_timeout_relative_rt, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex WAIT with relative timeout (REALTIME)"); +} +ATF_TC_BODY(futex_wait_timeout_relative_rt, tc) +{ + do_futex_wait_timeout(true, CLOCK_REALTIME); +} + +ATF_TC(futex_wait_timeout_deadline); +ATF_TC_HEAD(futex_wait_timeout_deadline, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex WAIT with absolute deadline"); +} +ATF_TC_BODY(futex_wait_timeout_deadline, tc) +{ + do_futex_wait_timeout(false, CLOCK_MONOTONIC); +} + +ATF_TC(futex_wait_timeout_deadline_rt); +ATF_TC_HEAD(futex_wait_timeout_deadline_rt, tc) +{ + atf_tc_set_md_var(tc, "descr", + "tests futex WAIT with absolute deadline (REALTIME)"); +} +ATF_TC_BODY(futex_wait_timeout_deadline_rt, tc) +{ + do_futex_wait_timeout(false, CLOCK_REALTIME); +} + +/*****************************************************************************/ + +ATF_TP_ADD_TCS(tp) +{ + ATF_TP_ADD_TC(tp, futex_basic_wait_wake_private); + ATF_TP_ADD_TC(tp, futex_basic_wait_wake_shared); + ATF_TP_ADD_TC(tp, futex_wait_wake_anon_bs_private); + ATF_TP_ADD_TC(tp, futex_wait_wake_anon_bs_shared); + ATF_TP_ADD_TC(tp, futex_wait_wake_file_bs_private); + ATF_TP_ADD_TC(tp, futex_wait_wake_file_bs_shared); + ATF_TP_ADD_TC(tp, futex_wait_wake_file_bs_cow_private); + ATF_TP_ADD_TC(tp, futex_wait_wake_file_bs_cow_shared); + + ATF_TP_ADD_TC(tp, futex_wait_wake_anon_bs_shared_proc); + ATF_TP_ADD_TC(tp, futex_wait_wake_file_bs_shared_proc); + + ATF_TP_ADD_TC(tp, futex_wait_pointless_bitset); + ATF_TP_ADD_TC(tp, futex_wait_wake_bitset); + + ATF_TP_ADD_TC(tp, futex_wait_timeout_relative); + ATF_TP_ADD_TC(tp, futex_wait_timeout_relative_rt); + ATF_TP_ADD_TC(tp, futex_wait_timeout_deadline); + ATF_TP_ADD_TC(tp, futex_wait_timeout_deadline_rt); + + ATF_TP_ADD_TC(tp, futex_requeue); + ATF_TP_ADD_TC(tp, futex_cmp_requeue); + + ATF_TP_ADD_TC(tp, futex_wake_op_op); + ATF_TP_ADD_TC(tp, futex_wake_op_cmp); + + return atf_no_error(); +} diff --git a/tests/lib/libc/sys/t_futex_robust.c b/tests/lib/libc/sys/t_futex_robust.c new file mode 100644 index 000000000000..a765e4accd65 --- /dev/null +++ b/tests/lib/libc/sys/t_futex_robust.c @@ -0,0 +1,408 @@ +/* $NetBSD: t_futex_robust.c,v 1.1 2020/04/26 18:53:33 thorpej Exp $ */ + +/*- + * Copyright (c) 2019 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__COPYRIGHT("@(#) Copyright (c) 2019\ + The NetBSD Foundation, inc. All rights reserved."); +__RCSID("$NetBSD: t_futex_robust.c,v 1.1 2020/04/26 18:53:33 thorpej Exp $"); + +#include +#include +#include +#include +#include + +#include + +#include + +#define STACK_SIZE 65536 +#define NLOCKS 16 + +struct futex_lock_pos { + struct futex_robust_list list; + int fword; +}; +struct futex_lock_pos pos_locks[NLOCKS]; + +struct futex_lock_neg { + int fword; + struct futex_robust_list list; +}; +struct futex_lock_neg neg_locks[NLOCKS]; + +struct lwp_data { + ucontext_t context; + void *stack_base; + lwpid_t lwpid; + lwpid_t threadid; + struct futex_robust_list_head rhead; + + /* Results to be asserted by main thread. */ + bool set_robust_list_failed; +}; + +struct lwp_data lwp_data; + +static void +setup_lwp_context(void (*func)(void *)) +{ + + memset(&lwp_data, 0, sizeof(lwp_data)); + lwp_data.stack_base = mmap(NULL, STACK_SIZE, + PROT_READ | PROT_WRITE, + MAP_ANON | MAP_STACK | MAP_PRIVATE, -1, 0); + ATF_REQUIRE(lwp_data.stack_base != MAP_FAILED); + _lwp_makecontext(&lwp_data.context, func, + &lwp_data, NULL, lwp_data.stack_base, STACK_SIZE); + lwp_data.threadid = 0; +} + +static void +do_cleanup(void) +{ + if (lwp_data.stack_base != NULL && + lwp_data.stack_base != MAP_FAILED) { + (void) munmap(lwp_data.stack_base, STACK_SIZE); + } + memset(&lwp_data, 0, sizeof(lwp_data)); + memset(pos_locks, 0, sizeof(pos_locks)); + memset(neg_locks, 0, sizeof(neg_locks)); +} + +static void +test_pos_robust_list(void *arg) +{ + struct lwp_data *d = arg; + int i; + + d->rhead.list.next = &d->rhead.list; + d->rhead.futex_offset = offsetof(struct futex_lock_pos, fword) - + offsetof(struct futex_lock_pos, list); + d->rhead.pending_list = NULL; + + if (__futex_set_robust_list(&d->rhead, sizeof(d->rhead)) != 0) { + d->set_robust_list_failed = true; + _lwp_exit(); + } + + memset(pos_locks, 0, sizeof(pos_locks)); + + d->threadid = _lwp_self(); + + for (i = 0; i < NLOCKS-1; i++) { + pos_locks[i].fword = _lwp_self(); + pos_locks[i].list.next = d->rhead.list.next; + d->rhead.list.next = &pos_locks[i].list; + } + + pos_locks[i].fword = _lwp_self(); + d->rhead.pending_list = &pos_locks[i].list; + + _lwp_exit(); +} + +static void +test_neg_robust_list(void *arg) +{ + struct lwp_data *d = arg; + int i; + + d->rhead.list.next = &d->rhead.list; + d->rhead.futex_offset = offsetof(struct futex_lock_neg, fword) - + offsetof(struct futex_lock_neg, list); + d->rhead.pending_list = NULL; + + if (__futex_set_robust_list(&d->rhead, sizeof(d->rhead)) != 0) { + d->set_robust_list_failed = true; + _lwp_exit(); + } + + memset(neg_locks, 0, sizeof(neg_locks)); + + d->threadid = _lwp_self(); + + for (i = 0; i < NLOCKS-1; i++) { + neg_locks[i].fword = _lwp_self(); + neg_locks[i].list.next = d->rhead.list.next; + d->rhead.list.next = &neg_locks[i].list; + } + + neg_locks[i].fword = _lwp_self(); + d->rhead.pending_list = &neg_locks[i].list; + + _lwp_exit(); +} + +static void +test_unmapped_robust_list(void *arg) +{ + struct lwp_data *d = arg; + + d->rhead.list.next = &d->rhead.list; + d->rhead.futex_offset = offsetof(struct futex_lock_pos, fword) - + offsetof(struct futex_lock_pos, list); + d->rhead.pending_list = NULL; + + if (__futex_set_robust_list((void *)sizeof(d->rhead), + sizeof(d->rhead)) != 0) { + d->set_robust_list_failed = true; + _lwp_exit(); + } + + memset(pos_locks, 0, sizeof(pos_locks)); + + d->threadid = _lwp_self(); + + _lwp_exit(); +} + +static void +test_evil_circular_robust_list(void *arg) +{ + struct lwp_data *d = arg; + int i; + + d->rhead.list.next = &d->rhead.list; + d->rhead.futex_offset = offsetof(struct futex_lock_pos, fword) - + offsetof(struct futex_lock_pos, list); + d->rhead.pending_list = NULL; + + if (__futex_set_robust_list(&d->rhead, sizeof(d->rhead)) != 0) { + d->set_robust_list_failed = true; + _lwp_exit(); + } + + memset(pos_locks, 0, sizeof(pos_locks)); + + d->threadid = _lwp_self(); + + for (i = 0; i < NLOCKS; i++) { + pos_locks[i].fword = _lwp_self(); + pos_locks[i].list.next = d->rhead.list.next; + d->rhead.list.next = &pos_locks[i].list; + } + + /* Make a loop. */ + pos_locks[0].list.next = pos_locks[NLOCKS-1].list.next; + + _lwp_exit(); +} + +static void +test_bad_pending_robust_list(void *arg) +{ + struct lwp_data *d = arg; + int i; + + d->rhead.list.next = &d->rhead.list; + d->rhead.futex_offset = offsetof(struct futex_lock_pos, fword) - + offsetof(struct futex_lock_pos, list); + d->rhead.pending_list = NULL; + + if (__futex_set_robust_list(&d->rhead, sizeof(d->rhead)) != 0) { + d->set_robust_list_failed = true; + _lwp_exit(); + } + + memset(pos_locks, 0, sizeof(pos_locks)); + + d->threadid = _lwp_self(); + + for (i = 0; i < NLOCKS; i++) { + pos_locks[i].fword = _lwp_self(); + pos_locks[i].list.next = d->rhead.list.next; + d->rhead.list.next = &pos_locks[i].list; + } + + d->rhead.pending_list = (void *)sizeof(d->rhead); + + _lwp_exit(); +} + +ATF_TC_WITH_CLEANUP(futex_robust_positive); +ATF_TC_HEAD(futex_robust_positive, tc) +{ + atf_tc_set_md_var(tc, "descr", + "checks futex robust list with positive futex word offset"); +} + +ATF_TC_BODY(futex_robust_positive, tc) +{ + int i; + + setup_lwp_context(test_pos_robust_list); + + ATF_REQUIRE(_lwp_create(&lwp_data.context, 0, &lwp_data.lwpid) == 0); + ATF_REQUIRE(_lwp_wait(lwp_data.lwpid, NULL) == 0); + + ATF_REQUIRE(lwp_data.set_robust_list_failed == false); + + for (i = 0; i < NLOCKS; i++) { + ATF_REQUIRE((pos_locks[i].fword & FUTEX_TID_MASK) == + lwp_data.threadid); + ATF_REQUIRE((pos_locks[i].fword & FUTEX_OWNER_DIED) != 0); + } +} + +ATF_TC_CLEANUP(futex_robust_positive, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_robust_negative); +ATF_TC_HEAD(futex_robust_negative, tc) +{ + atf_tc_set_md_var(tc, "descr", + "checks futex robust list with negative futex word offset"); +} + +ATF_TC_BODY(futex_robust_negative, tc) +{ + int i; + + setup_lwp_context(test_neg_robust_list); + + ATF_REQUIRE(_lwp_create(&lwp_data.context, 0, &lwp_data.lwpid) == 0); + ATF_REQUIRE(_lwp_wait(lwp_data.lwpid, NULL) == 0); + + ATF_REQUIRE(lwp_data.set_robust_list_failed == false); + + for (i = 0; i < NLOCKS; i++) { + ATF_REQUIRE((neg_locks[i].fword & FUTEX_TID_MASK) == + lwp_data.threadid); + ATF_REQUIRE((neg_locks[i].fword & FUTEX_OWNER_DIED) != 0); + } +} + +ATF_TC_CLEANUP(futex_robust_negative, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_robust_unmapped); +ATF_TC_HEAD(futex_robust_unmapped, tc) +{ + atf_tc_set_md_var(tc, "descr", + "checks futex robust list with unmapped robust list pointer"); +} + +ATF_TC_BODY(futex_robust_unmapped, tc) +{ + + setup_lwp_context(test_unmapped_robust_list); + + ATF_REQUIRE(_lwp_create(&lwp_data.context, 0, &lwp_data.lwpid) == 0); + ATF_REQUIRE(_lwp_wait(lwp_data.lwpid, NULL) == 0); + + ATF_REQUIRE(lwp_data.set_robust_list_failed == false); + + /* + * No additional validation; just exercises a code path + * in the kernel. + */ +} + +ATF_TC_CLEANUP(futex_robust_unmapped, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_robust_evil_circular); +ATF_TC_HEAD(futex_robust_evil_circular, tc) +{ + atf_tc_set_md_var(tc, "descr", + "checks futex robust list processing faced with a deliberately " + "ciruclar list"); +} + +ATF_TC_BODY(futex_robust_evil_circular, tc) +{ + int i; + + setup_lwp_context(test_evil_circular_robust_list); + + ATF_REQUIRE(_lwp_create(&lwp_data.context, 0, &lwp_data.lwpid) == 0); + ATF_REQUIRE(_lwp_wait(lwp_data.lwpid, NULL) == 0); + + ATF_REQUIRE(lwp_data.set_robust_list_failed == false); + + for (i = 0; i < NLOCKS; i++) { + ATF_REQUIRE((pos_locks[i].fword & FUTEX_TID_MASK) == + lwp_data.threadid); + ATF_REQUIRE((pos_locks[i].fword & FUTEX_OWNER_DIED) != 0); + } +} + +ATF_TC_CLEANUP(futex_robust_evil_circular, tc) +{ + do_cleanup(); +} + +ATF_TC_WITH_CLEANUP(futex_robust_bad_pending); +ATF_TC_HEAD(futex_robust_bad_pending, tc) +{ + atf_tc_set_md_var(tc, "descr", + "checks futex robust list processing with a bad pending pointer"); +} + +ATF_TC_BODY(futex_robust_bad_pending, tc) +{ + int i; + + setup_lwp_context(test_bad_pending_robust_list); + + ATF_REQUIRE(_lwp_create(&lwp_data.context, 0, &lwp_data.lwpid) == 0); + ATF_REQUIRE(_lwp_wait(lwp_data.lwpid, NULL) == 0); + + ATF_REQUIRE(lwp_data.set_robust_list_failed == false); + + for (i = 0; i < NLOCKS; i++) { + ATF_REQUIRE((pos_locks[i].fword & FUTEX_TID_MASK) == + lwp_data.threadid); + ATF_REQUIRE((pos_locks[i].fword & FUTEX_OWNER_DIED) != 0); + } +} + +ATF_TC_CLEANUP(futex_robust_bad_pending, tc) +{ + do_cleanup(); +} + +ATF_TP_ADD_TCS(tp) +{ +#if 0 + ATF_TP_ADD_TC(tp, futex_robust_positive); + ATF_TP_ADD_TC(tp, futex_robust_negative); + ATF_TP_ADD_TC(tp, futex_robust_unmapped); + ATF_TP_ADD_TC(tp, futex_robust_evil_circular); +#endif + ATF_TP_ADD_TC(tp, futex_robust_bad_pending); + + return atf_no_error(); +}