From 6593739f61d37865a41294d0e5da89086767cc19 Mon Sep 17 00:00:00 2001 From: manu Date: Wed, 22 Jun 2005 15:10:51 +0000 Subject: [PATCH] Implent CLONE_PARENT_SETTID, CLONE_CHILD_CLEARTID, and CLONE_CHILD_SETTID options to clone(). This makes fork() work on amd64. clone() prototype has changed and the changes is probably revelant on some other arches. --- sys/compat/linux/arch/amd64/linux_machdep.c | 7 +- sys/compat/linux/arch/amd64/linux_syscall.h | 7 +- .../linux/arch/amd64/linux_syscallargs.h | 10 +- sys/compat/linux/arch/amd64/linux_syscalls.c | 6 +- sys/compat/linux/arch/amd64/linux_sysent.c | 10 +- sys/compat/linux/arch/amd64/syscalls.master | 7 +- sys/compat/linux/common/linux_emuldata.h | 7 +- sys/compat/linux/common/linux_exec.c | 94 ++++++++++++++++++- sys/compat/linux/common/linux_sched.c | 68 +++++++++++++- 9 files changed, 190 insertions(+), 26 deletions(-) diff --git a/sys/compat/linux/arch/amd64/linux_machdep.c b/sys/compat/linux/arch/amd64/linux_machdep.c index 5a219ee98807..15cf52e9fb26 100644 --- a/sys/compat/linux/arch/amd64/linux_machdep.c +++ b/sys/compat/linux/arch/amd64/linux_machdep.c @@ -1,4 +1,4 @@ -/* $NetBSD: linux_machdep.c,v 1.6 2005/05/22 19:31:15 fvdl Exp $ */ +/* $NetBSD: linux_machdep.c,v 1.7 2005/06/22 15:10:51 manu Exp $ */ /*- * Copyright (c) 2005 Emmanuel Dreyfus, all rights reserved. @@ -33,7 +33,7 @@ #include -__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.6 2005/05/22 19:31:15 fvdl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux_machdep.c,v 1.7 2005/06/22 15:10:51 manu Exp $"); #include #include @@ -188,7 +188,8 @@ linux_sendsig(ksi, mask) bzero(&sigframe, sizeof(sigframe)); if (ps->sa_sigdesc[sig].sd_vers != 0) - sigframe.pretcode = (char *)ps->sa_sigdesc[sig].sd_tramp; + sigframe.pretcode = + (char *)(u_long)ps->sa_sigdesc[sig].sd_tramp; else sigframe.pretcode = NULL; diff --git a/sys/compat/linux/arch/amd64/linux_syscall.h b/sys/compat/linux/arch/amd64/linux_syscall.h index 809e0c71cf82..3450cf163d02 100644 --- a/sys/compat/linux/arch/amd64/linux_syscall.h +++ b/sys/compat/linux/arch/amd64/linux_syscall.h @@ -1,4 +1,4 @@ -/* $NetBSD: linux_syscall.h,v 1.4 2005/05/22 19:29:40 fvdl Exp $ */ +/* $NetBSD: linux_syscall.h,v 1.5 2005/06/22 15:10:51 manu Exp $ */ /* * System call numbers. @@ -163,7 +163,7 @@ /* syscall: "getsockopt" ret: "int" args: "int" "int" "int" "void *" "int *" */ #define LINUX_SYS_getsockopt 55 -/* syscall: "clone" ret: "int" args: "int" "void *" */ +/* syscall: "clone" ret: "int" args: "int" "void *" "void *" "void *" */ #define LINUX_SYS_clone 56 /* syscall: "fork" ret: "int" args: */ @@ -496,6 +496,9 @@ /* syscall: "getdents64" ret: "int" args: "int" "struct linux_dirent64 *" "unsigned int" */ #define LINUX_SYS_getdents64 217 +/* syscall: "set_tid_address" ret: "int" args: "int *" */ +#define LINUX_SYS_set_tid_address 218 + /* syscall: "clock_settime" ret: "int" args: "clockid_t" "struct linux_timespec *" */ #define LINUX_SYS_clock_settime 227 diff --git a/sys/compat/linux/arch/amd64/linux_syscallargs.h b/sys/compat/linux/arch/amd64/linux_syscallargs.h index 31979f4290a8..c5142e2f18a2 100644 --- a/sys/compat/linux/arch/amd64/linux_syscallargs.h +++ b/sys/compat/linux/arch/amd64/linux_syscallargs.h @@ -1,4 +1,4 @@ -/* $NetBSD: linux_syscallargs.h,v 1.4 2005/05/22 19:29:40 fvdl Exp $ */ +/* $NetBSD: linux_syscallargs.h,v 1.5 2005/06/22 15:10:51 manu Exp $ */ /* * System call argument lists. @@ -218,6 +218,8 @@ struct linux_sys_getsockopt_args { struct linux_sys_clone_args { syscallarg(int) flags; syscallarg(void *) stack; + syscallarg(void *) parent_tidptr; + syscallarg(void *) child_tidptr; }; struct linux_sys_execve_args { @@ -608,6 +610,10 @@ struct linux_sys_getdents64_args { syscallarg(unsigned int) count; }; +struct linux_sys_set_tid_address_args { + syscallarg(int *) tid; +}; + struct linux_sys_clock_settime_args { syscallarg(clockid_t) which; syscallarg(struct linux_timespec *) tp; @@ -964,6 +970,8 @@ int linux_sys_time(struct lwp *, void *, register_t *); int linux_sys_getdents64(struct lwp *, void *, register_t *); +int linux_sys_set_tid_address(struct lwp *, void *, register_t *); + int linux_sys_clock_settime(struct lwp *, void *, register_t *); int linux_sys_clock_gettime(struct lwp *, void *, register_t *); diff --git a/sys/compat/linux/arch/amd64/linux_syscalls.c b/sys/compat/linux/arch/amd64/linux_syscalls.c index 12631aa9d35e..b71bca52d3ff 100644 --- a/sys/compat/linux/arch/amd64/linux_syscalls.c +++ b/sys/compat/linux/arch/amd64/linux_syscalls.c @@ -1,4 +1,4 @@ -/* $NetBSD: linux_syscalls.c,v 1.4 2005/05/22 19:29:40 fvdl Exp $ */ +/* $NetBSD: linux_syscalls.c,v 1.5 2005/06/22 15:10:51 manu Exp $ */ /* * System call names. @@ -8,7 +8,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: linux_syscalls.c,v 1.4 2005/05/22 19:29:40 fvdl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux_syscalls.c,v 1.5 2005/06/22 15:10:51 manu Exp $"); #if defined(_KERNEL_OPT) #if defined(_KERNEL_OPT) @@ -248,7 +248,7 @@ const char *const linux_syscallnames[] = { "#215 (unimplemented epoll_wait_old)", /* 215 = unimplemented epoll_wait_old */ "#216 (unimplemented remap_file_pages)", /* 216 = unimplemented remap_file_pages */ "getdents64", /* 217 = getdents64 */ - "#218 (unimplemented set_tid_address)", /* 218 = unimplemented set_tid_address */ + "set_tid_address", /* 218 = set_tid_address */ "#219 (unimplemented restart_syscall)", /* 219 = unimplemented restart_syscall */ "#220 (unimplemented semtimedop)", /* 220 = unimplemented semtimedop */ "#221 (unimplemented fadvise64)", /* 221 = unimplemented fadvise64 */ diff --git a/sys/compat/linux/arch/amd64/linux_sysent.c b/sys/compat/linux/arch/amd64/linux_sysent.c index d6fbeb202f72..b5e2e0af2799 100644 --- a/sys/compat/linux/arch/amd64/linux_sysent.c +++ b/sys/compat/linux/arch/amd64/linux_sysent.c @@ -1,4 +1,4 @@ -/* $NetBSD: linux_sysent.c,v 1.4 2005/05/22 19:29:40 fvdl Exp $ */ +/* $NetBSD: linux_sysent.c,v 1.5 2005/06/22 15:10:51 manu Exp $ */ /* * System call switch table. @@ -8,7 +8,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: linux_sysent.c,v 1.4 2005/05/22 19:29:40 fvdl Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux_sysent.c,v 1.5 2005/06/22 15:10:51 manu Exp $"); #if defined(_KERNEL_OPT) #include "opt_compat_43.h" @@ -142,7 +142,7 @@ struct sysent linux_sysent[] = { linux_sys_setsockopt }, /* 54 = setsockopt */ { 5, s(struct linux_sys_getsockopt_args), 0, linux_sys_getsockopt }, /* 55 = getsockopt */ - { 2, s(struct linux_sys_clone_args), 0, + { 4, s(struct linux_sys_clone_args), 0, linux_sys_clone }, /* 56 = clone */ { 0, 0, 0, sys_fork }, /* 57 = fork */ @@ -466,8 +466,8 @@ struct sysent linux_sysent[] = { linux_sys_nosys }, /* 216 = unimplemented remap_file_pages */ { 3, s(struct linux_sys_getdents64_args), 0, linux_sys_getdents64 }, /* 217 = getdents64 */ - { 0, 0, 0, - linux_sys_nosys }, /* 218 = unimplemented set_tid_address */ + { 1, s(struct linux_sys_set_tid_address_args), 0, + linux_sys_set_tid_address }, /* 218 = set_tid_address */ { 0, 0, 0, linux_sys_nosys }, /* 219 = unimplemented restart_syscall */ { 0, 0, 0, diff --git a/sys/compat/linux/arch/amd64/syscalls.master b/sys/compat/linux/arch/amd64/syscalls.master index 4e0960e6462e..3eb452e1d43c 100644 --- a/sys/compat/linux/arch/amd64/syscalls.master +++ b/sys/compat/linux/arch/amd64/syscalls.master @@ -1,4 +1,4 @@ - $NetBSD: syscalls.master,v 1.4 2005/05/22 19:29:15 fvdl Exp $ + $NetBSD: syscalls.master,v 1.5 2005/06/22 15:10:51 manu Exp $ ; @(#)syscalls.master 8.1 (Berkeley) 7/19/93 @@ -153,7 +153,8 @@ int optname, void *optval, int optlen); } 55 STD { int linux_sys_getsockopt(int s, int level, \ int optname, void *optval, int *optlen); } -56 STD { int linux_sys_clone(int flags, void *stack); } +56 STD { int linux_sys_clone(int flags, void *stack, \ + void *parent_tidptr, void *child_tidptr); } 57 NOARGS { int sys_fork(void); } 58 NOARGS { int sys___vfork14(void); } 59 STD { int linux_sys_execve(const char *path, char **argp, \ @@ -365,7 +366,7 @@ 216 UNIMPL remap_file_pages 217 STD { int linux_sys_getdents64(int fd, \ struct linux_dirent64 *dent, unsigned int count); } -218 UNIMPL set_tid_address +218 STD { int linux_sys_set_tid_address(int *tid); } 219 UNIMPL restart_syscall 220 UNIMPL semtimedop 221 UNIMPL fadvise64 diff --git a/sys/compat/linux/common/linux_emuldata.h b/sys/compat/linux/common/linux_emuldata.h index d80108686eb6..c6180022f95f 100644 --- a/sys/compat/linux/common/linux_emuldata.h +++ b/sys/compat/linux/common/linux_emuldata.h @@ -1,4 +1,4 @@ -/* $NetBSD: linux_emuldata.h,v 1.7 2005/02/26 23:10:19 perry Exp $ */ +/* $NetBSD: linux_emuldata.h,v 1.8 2005/06/22 15:10:51 manu Exp $ */ /*- * Copyright (c) 1998,2002 The NetBSD Foundation, Inc. @@ -48,6 +48,7 @@ struct linux_emuldata_shared { caddr_t p_break; /* Processes' idea of break */ int refs; + pid_t group_pid; /* PID of Linux process (group of threads) */ }; struct linux_emuldata { @@ -57,6 +58,10 @@ struct linux_emuldata { int debugreg[8]; /* GDB information for ptrace - for use, */ /* see ../arch/i386/linux_ptrace.c */ struct linux_emuldata_shared *s; + int *child_set_tid; /* in clone(): Child's TID to set on clone */ + int *child_clear_tid; /* in clone(): Child's TID to clear on exit */ + int *set_tid; /* in clone(): Own TID to set on clone */ + int *clear_tid; /* Own TID to clear on exit */ }; #endif /* !_COMMON_LINUX_EMULDATA_H */ diff --git a/sys/compat/linux/common/linux_exec.c b/sys/compat/linux/common/linux_exec.c index 8f5b8a108948..67892d02dc1b 100644 --- a/sys/compat/linux/common/linux_exec.c +++ b/sys/compat/linux/common/linux_exec.c @@ -1,4 +1,4 @@ -/* $NetBSD: linux_exec.c,v 1.76 2005/06/02 16:54:52 tsutsui Exp $ */ +/* $NetBSD: linux_exec.c,v 1.77 2005/06/22 15:10:51 manu Exp $ */ /*- * Copyright (c) 1994, 1995, 1998, 2000 The NetBSD Foundation, Inc. @@ -38,7 +38,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: linux_exec.c,v 1.76 2005/06/02 16:54:52 tsutsui Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux_exec.c,v 1.77 2005/06/22 15:10:51 manu Exp $"); #include #include @@ -81,6 +81,8 @@ static void linux_e_proc_fork __P((struct proc *, struct proc *, int)); static void linux_e_proc_exit __P((struct proc *)); static void linux_e_proc_init __P((struct proc *, struct proc *, int)); +static void linux_userret_settid __P((struct lwp *, void *)); + /* * Execve(2). Just check the alternate emulation path, and pass it on * to the NetBSD execve(). @@ -159,6 +161,7 @@ linux_e_proc_init(p, parent, forkflags) { struct linux_emuldata *e = p->p_emuldata; struct linux_emuldata_shared *s; + struct linux_emuldata *ep = NULL; if (!e) { /* allocate new Linux emuldata */ @@ -172,9 +175,17 @@ linux_e_proc_init(p, parent, forkflags) memset(e, '\0', sizeof(struct linux_emuldata)); + if (parent) + ep = parent->p_emuldata; + if (forkflags & FORK_SHAREVM) { - struct linux_emuldata *e2 = parent->p_emuldata; - s = e2->s; +#ifdef DIAGNOSTIC + if (ep == NULL) { + killproc(p, "FORK_SHAREVM while emuldata is NULL\n"); + return; + } +#endif + s = ep->s; s->refs++; } else { struct vmspace *vm; @@ -193,9 +204,33 @@ linux_e_proc_init(p, parent, forkflags) vm = (parent) ? parent->p_vmspace : p->p_vmspace; s->p_break = vm->vm_daddr + ctob(vm->vm_dsize); + /* + * Linux threads are emulated as NetBSD processes (not lwp) + * We use native PID for Linux TID. The Linux TID is the + * PID of the first process in the group. It is stored + * here + */ + s->group_pid = p->p_pid; } e->s = s; + + /* + * initialize TID pointers. ep->child_clear_tid and + * ep->child_set_tid will not be used beyond this point. + */ + e->child_clear_tid = NULL; + e->child_set_tid = NULL; + if (ep != NULL) { + e->clear_tid = ep->child_clear_tid; + e->set_tid = ep->child_set_tid; + ep->child_clear_tid = NULL; + ep->child_set_tid = NULL; + } else { + e->clear_tid = NULL; + e->set_tid = NULL; + } + p->p_emuldata = e; } @@ -222,6 +257,23 @@ linux_e_proc_exit(p) { struct linux_emuldata *e = p->p_emuldata; + /* Emulate LINUX_CLONE_CHILD_CLEARTID */ + if (e->clear_tid != NULL) { + int error; + int null = 0; + + if ((error = copyout(&null, + e->clear_tid, + sizeof(null))) != 0) + printf("linux_e_proc_exit: cannot clear TID\n"); + +#ifdef notyet /* Not yet implemented */ + if ((error = linux_sys_futex(e->clear_tid, + LINUX_FUTEX_WAKE, 1, NULL, NULL, 0)) != 0) + printf("linux_e_proc_exit: linux_sys_futex failed\n"); +#endif + } + /* free Linux emuldata and set the pointer to null */ e->s->refs--; if (e->s->refs == 0) @@ -238,6 +290,8 @@ linux_e_proc_fork(p, parent, forkflags) struct proc *p, *parent; int forkflags; { + struct linux_emuldata *e; + /* * The new process might share some vmspace-related stuff * with parent, depending on fork flags (CLONE_VM et.al). @@ -246,4 +300,36 @@ linux_e_proc_fork(p, parent, forkflags) */ p->p_emuldata = NULL; linux_e_proc_init(p, parent, forkflags); + + /* + * Emulate LINUX_CLONE_CHILD_SETTID: This cannot be done + * right now because the child VM is not set up. We will + * do it at userret time. + */ + e = p->p_emuldata; + if (e->set_tid != NULL) + p->p_userret = (*linux_userret_settid); + + return; +} + +static void +linux_userret_settid(l, arg) + struct lwp *l; + void *arg; +{ + struct proc *p = l->l_proc; + struct linux_emuldata *led = p->p_emuldata; + int error; + + p->p_userret = NULL; + + /* Emulate LINUX_CLONE_CHILD_SETTID */ + if (led->set_tid != NULL) { + if ((error = copyout(&p->p_pid, + led->set_tid, sizeof(p->p_pid))) != 0) + printf("linux_userret_settid: cannot set TID\n"); + } + + return; } diff --git a/sys/compat/linux/common/linux_sched.c b/sys/compat/linux/common/linux_sched.c index 74eaef7caa59..f7d5c6701044 100644 --- a/sys/compat/linux/common/linux_sched.c +++ b/sys/compat/linux/common/linux_sched.c @@ -1,4 +1,4 @@ -/* $NetBSD: linux_sched.c,v 1.18 2004/09/10 22:22:20 wiz Exp $ */ +/* $NetBSD: linux_sched.c,v 1.19 2005/06/22 15:10:51 manu Exp $ */ /*- * Copyright (c) 1999 The NetBSD Foundation, Inc. @@ -42,7 +42,7 @@ */ #include -__KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.18 2004/09/10 22:22:20 wiz Exp $"); +__KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.19 2005/06/22 15:10:51 manu Exp $"); #include #include @@ -56,6 +56,7 @@ __KERNEL_RCSID(0, "$NetBSD: linux_sched.c,v 1.18 2004/09/10 22:22:20 wiz Exp $") #include #include +#include #include @@ -70,8 +71,16 @@ linux_sys_clone(l, v, retval) struct linux_sys_clone_args /* { syscallarg(int) flags; syscallarg(void *) stack; +#ifdef __amd64__ + syscallarg(void *) parent_tidptr; + syscallarg(void *) child_tidptr; +#endif } */ *uap = v; int flags, sig; + int error; +#ifdef __amd64__ + struct linux_emuldata *led; +#endif /* * We don't support the Linux CLONE_PID or CLONE_PTRACE flags. @@ -108,14 +117,44 @@ linux_sys_clone(l, v, retval) return (EINVAL); sig = linux_to_native_signo[sig]; +#ifdef __amd64__ + led = (struct linux_emuldata *)l->l_proc->p_emuldata; + + if (SCARG(uap, flags) & LINUX_CLONE_PARENT_SETTID) { + if (SCARG(uap, parent_tidptr) == NULL) { + printf("linux_sys_clone: NULL parent_tidptr\n"); + return EINVAL; + } + + if ((error = copyout(&l->l_proc->p_pid, + SCARG(uap, parent_tidptr), + sizeof(l->l_proc->p_pid))) != 0) + return error; + } + + /* CLONE_CHILD_CLEARTID: TID clear in the child on exit() */ + if (SCARG(uap, flags) & LINUX_CLONE_CHILD_CLEARTID) + led->child_clear_tid = SCARG(uap, child_tidptr); + else + led->child_clear_tid = NULL; + + /* CLONE_CHILD_SETTID: TID set in the child on clone() */ + if (SCARG(uap, flags) & LINUX_CLONE_CHILD_SETTID) + led->child_set_tid = SCARG(uap, child_tidptr); + else + led->child_set_tid = NULL; +#endif /* * Note that Linux does not provide a portable way of specifying * the stack area; the caller must know if the stack grows up * or down. So, we pass a stack size of 0, so that the code * that makes this adjustment is a noop. */ - return (fork1(l, flags, sig, SCARG(uap, stack), 0, - NULL, NULL, retval, NULL)); + if ((error = fork1(l, flags, sig, SCARG(uap, stack), 0, + NULL, NULL, retval, NULL)) != 0) + return error; + + return 0; } int @@ -367,3 +406,24 @@ linux_sys_exit_group(l, v, retval) return 0; } #endif /* !__m68k__ */ + +#ifdef __amd64__ +int +linux_sys_set_tid_address(l, v, retval) + struct lwp *l; + void *v; + register_t *retval; +{ + struct linux_sys_set_tid_address_args /* { + syscallarg(int *) tidptr; + } */ *uap = v; + struct linux_emuldata *led; + + led = (struct linux_emuldata *)l->l_proc->p_emuldata; + led->clear_tid = SCARG(uap, tid); + + *retval = l->l_proc->p_pid; + + return 0; +} +#endif /* __amd64__ */