2017-11-14 01:01:45 +03:00
|
|
|
/* $NetBSD: kern_exec.c,v 1.453 2017/11/13 22:01:45 christos Exp $ */
|
2008-07-02 21:28:54 +04:00
|
|
|
|
|
|
|
/*-
|
|
|
|
* Copyright (c) 2008 The NetBSD Foundation, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
|
|
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
|
|
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
|
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
|
|
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
1994-06-29 10:29:24 +04:00
|
|
|
|
|
|
|
/*-
|
1996-10-01 03:18:43 +04:00
|
|
|
* Copyright (C) 1993, 1994, 1996 Christopher G. Demetriou
|
1994-06-29 10:29:24 +04:00
|
|
|
* Copyright (C) 1992 Wolfgang Solfrank.
|
|
|
|
* Copyright (C) 1992 TooLs GmbH.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
|
|
* must display the following acknowledgement:
|
|
|
|
* This product includes software developed by TooLs GmbH.
|
|
|
|
* 4. The name of TooLs GmbH may not be used to endorse or promote products
|
|
|
|
* derived from this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
|
|
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
|
|
* IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
|
|
|
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
|
|
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
|
|
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
|
|
|
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2001-11-12 18:25:01 +03:00
|
|
|
#include <sys/cdefs.h>
|
2017-11-14 01:01:45 +03:00
|
|
|
__KERNEL_RCSID(0, "$NetBSD: kern_exec.c,v 1.453 2017/11/13 22:01:45 christos Exp $");
|
2001-11-12 18:25:01 +03:00
|
|
|
|
2011-08-26 23:07:13 +04:00
|
|
|
#include "opt_exec.h"
|
2013-04-21 02:28:58 +04:00
|
|
|
#include "opt_execfmt.h"
|
1998-06-26 01:17:15 +04:00
|
|
|
#include "opt_ktrace.h"
|
2009-02-14 01:41:00 +03:00
|
|
|
#include "opt_modular.h"
|
2000-11-21 03:37:49 +03:00
|
|
|
#include "opt_syscall_debug.h"
|
2006-07-26 13:33:57 +04:00
|
|
|
#include "veriexec.h"
|
2006-11-22 05:02:51 +03:00
|
|
|
#include "opt_pax.h"
|
1998-02-10 17:08:44 +03:00
|
|
|
|
1994-06-29 10:29:24 +04:00
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/filedesc.h>
|
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/proc.h>
|
|
|
|
#include <sys/mount.h>
|
2008-01-02 22:44:36 +03:00
|
|
|
#include <sys/kmem.h>
|
1994-06-29 10:29:24 +04:00
|
|
|
#include <sys/namei.h>
|
|
|
|
#include <sys/vnode.h>
|
|
|
|
#include <sys/file.h>
|
2015-09-11 04:23:37 +03:00
|
|
|
#include <sys/filedesc.h>
|
1994-06-29 10:29:24 +04:00
|
|
|
#include <sys/acct.h>
|
2012-02-12 03:16:15 +04:00
|
|
|
#include <sys/atomic.h>
|
1994-06-29 10:29:24 +04:00
|
|
|
#include <sys/exec.h>
|
|
|
|
#include <sys/ktrace.h>
|
2008-10-11 17:40:57 +04:00
|
|
|
#include <sys/uidinfo.h>
|
1994-06-29 10:29:24 +04:00
|
|
|
#include <sys/wait.h>
|
|
|
|
#include <sys/mman.h>
|
2002-08-28 11:16:33 +04:00
|
|
|
#include <sys/ras.h>
|
1994-06-29 10:29:24 +04:00
|
|
|
#include <sys/signalvar.h>
|
|
|
|
#include <sys/stat.h>
|
2000-11-21 03:37:49 +03:00
|
|
|
#include <sys/syscall.h>
|
2006-05-15 01:15:11 +04:00
|
|
|
#include <sys/kauth.h>
|
2007-11-13 02:11:58 +03:00
|
|
|
#include <sys/lwpctl.h>
|
2007-12-27 01:11:47 +03:00
|
|
|
#include <sys/pax.h>
|
2007-12-31 18:31:24 +03:00
|
|
|
#include <sys/cpu.h>
|
2008-11-19 21:35:57 +03:00
|
|
|
#include <sys/module.h>
|
2009-06-03 03:21:37 +04:00
|
|
|
#include <sys/syscallvar.h>
|
1994-10-20 07:22:35 +03:00
|
|
|
#include <sys/syscallargs.h>
|
2006-07-22 14:34:26 +04:00
|
|
|
#if NVERIEXEC > 0
|
2005-04-20 17:44:45 +04:00
|
|
|
#include <sys/verified_exec.h>
|
2006-07-22 14:34:26 +04:00
|
|
|
#endif /* NVERIEXEC > 0 */
|
DTrace: Add an SDT (Statically Defined Tracing) provider framework, and
implement most of the proc provider. Adds proc:::create, exec,
exec_success, exec_faillure, signal_send, signal_discard, signal_handle,
lwp_create, lwp_start, lwp_exit.
2010-03-02 00:10:13 +03:00
|
|
|
#include <sys/sdt.h>
|
2012-02-12 03:16:15 +04:00
|
|
|
#include <sys/spawn.h>
|
|
|
|
#include <sys/prot.h>
|
First step of random number subsystem rework described in
<20111022023242.BA26F14A158@mail.netbsd.org>. This change includes
the following:
An initial cleanup and minor reorganization of the entropy pool
code in sys/dev/rnd.c and sys/dev/rndpool.c. Several bugs are
fixed. Some effort is made to accumulate entropy more quickly at
boot time.
A generic interface, "rndsink", is added, for stream generators to
request that they be re-keyed with good quality entropy from the pool
as soon as it is available.
The arc4random()/arc4randbytes() implementation in libkern is
adjusted to use the rndsink interface for rekeying, which helps
address the problem of low-quality keys at boot time.
An implementation of the FIPS 140-2 statistical tests for random
number generator quality is provided (libkern/rngtest.c). This
is based on Greg Rose's implementation from Qualcomm.
A new random stream generator, nist_ctr_drbg, is provided. It is
based on an implementation of the NIST SP800-90 CTR_DRBG by
Henric Jungheim. This generator users AES in a modified counter
mode to generate a backtracking-resistant random stream.
An abstraction layer, "cprng", is provided for in-kernel consumers
of randomness. The arc4random/arc4randbytes API is deprecated for
in-kernel use. It is replaced by "cprng_strong". The current
cprng_fast implementation wraps the existing arc4random
implementation. The current cprng_strong implementation wraps the
new CTR_DRBG implementation. Both interfaces are rekeyed from
the entropy pool automatically at intervals justifiable from best
current cryptographic practice.
In some quick tests, cprng_fast() is about the same speed as
the old arc4randbytes(), and cprng_strong() is about 20% faster
than rnd_extract_data(). Performance is expected to improve.
The AES code in src/crypto/rijndael is no longer an optional
kernel component, as it is required by cprng_strong, which is
not an optional kernel component.
The entropy pool output is subjected to the rngtest tests at
startup time; if it fails, the system will reboot. There is
approximately a 3/10000 chance of a false positive from these
tests. Entropy pool _input_ from hardware random numbers is
subjected to the rngtest tests at attach time, as well as the
FIPS continuous-output test, to detect bad or stuck hardware
RNGs; if any are detected, they are detached, but the system
continues to run.
A problem with rndctl(8) is fixed -- datastructures with
pointers in arrays are no longer passed to userspace (this
was not a security problem, but rather a major issue for
compat32). A new kernel will require a new rndctl.
The sysctl kern.arandom() and kern.urandom() nodes are hooked
up to the new generators, but the /dev/*random pseudodevices
are not, yet.
Manual pages for the new kernel interfaces are forthcoming.
2011-11-20 02:51:18 +04:00
|
|
|
#include <sys/cprng.h>
|
1994-10-20 07:22:35 +03:00
|
|
|
|
1998-02-05 10:59:28 +03:00
|
|
|
#include <uvm/uvm_extern.h>
|
|
|
|
|
1994-06-29 10:29:24 +04:00
|
|
|
#include <machine/reg.h>
|
|
|
|
|
2007-04-22 12:29:55 +04:00
|
|
|
#include <compat/common/compat_util.h>
|
|
|
|
|
2013-11-14 16:07:11 +04:00
|
|
|
#ifndef MD_TOPDOWN_INIT
|
2014-01-25 23:44:11 +04:00
|
|
|
#ifdef __USE_TOPDOWN_VM
|
2013-11-14 16:07:11 +04:00
|
|
|
#define MD_TOPDOWN_INIT(epp) (epp)->ep_flags |= EXEC_TOPDOWN_VM
|
|
|
|
#else
|
|
|
|
#define MD_TOPDOWN_INIT(epp)
|
|
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
2014-04-13 10:03:49 +04:00
|
|
|
struct execve_data;
|
|
|
|
|
2016-08-06 18:13:13 +03:00
|
|
|
extern int user_va0_disable;
|
|
|
|
|
2014-04-15 19:50:16 +04:00
|
|
|
static size_t calcargs(struct execve_data * restrict, const size_t);
|
|
|
|
static size_t calcstack(struct execve_data * restrict, const size_t);
|
2014-04-15 21:06:21 +04:00
|
|
|
static int copyoutargs(struct execve_data * restrict, struct lwp *,
|
|
|
|
char * const);
|
2014-04-15 20:44:57 +04:00
|
|
|
static int copyoutpsstrs(struct execve_data * restrict, struct proc *);
|
2014-04-13 10:03:49 +04:00
|
|
|
static int copyinargs(struct execve_data * restrict, char * const *,
|
|
|
|
char * const *, execve_fetch_element_t, char **);
|
2014-04-13 13:19:42 +04:00
|
|
|
static int copyinargstrs(struct execve_data * restrict, char * const *,
|
|
|
|
execve_fetch_element_t, char **, size_t *, void (*)(const void *, size_t));
|
2003-08-24 21:52:28 +04:00
|
|
|
static int exec_sigcode_map(struct proc *, const struct emul *);
|
|
|
|
|
2016-05-11 05:18:27 +03:00
|
|
|
#if defined(DEBUG) && !defined(DEBUG_EXEC)
|
2016-05-08 23:00:21 +03:00
|
|
|
#define DEBUG_EXEC
|
|
|
|
#endif
|
2001-07-16 00:49:40 +04:00
|
|
|
#ifdef DEBUG_EXEC
|
2011-01-18 11:21:03 +03:00
|
|
|
#define DPRINTF(a) printf a
|
2011-03-14 02:44:14 +03:00
|
|
|
#define COPYPRINTF(s, a, b) printf("%s, %d: copyout%s @%p %zu\n", __func__, \
|
|
|
|
__LINE__, (s), (a), (b))
|
2014-04-12 11:33:51 +04:00
|
|
|
static void dump_vmcmds(const struct exec_package * const, size_t, int);
|
|
|
|
#define DUMPVMCMDS(p, x, e) do { dump_vmcmds((p), (x), (e)); } while (0)
|
2001-07-16 00:49:40 +04:00
|
|
|
#else
|
|
|
|
#define DPRINTF(a)
|
2011-03-14 02:44:14 +03:00
|
|
|
#define COPYPRINTF(s, a, b)
|
2014-04-12 11:33:51 +04:00
|
|
|
#define DUMPVMCMDS(p, x, e) do {} while (0)
|
2001-07-16 00:49:40 +04:00
|
|
|
#endif /* DEBUG_EXEC */
|
|
|
|
|
DTrace: Add an SDT (Statically Defined Tracing) provider framework, and
implement most of the proc provider. Adds proc:::create, exec,
exec_success, exec_faillure, signal_send, signal_discard, signal_handle,
lwp_create, lwp_start, lwp_exit.
2010-03-02 00:10:13 +03:00
|
|
|
/*
|
|
|
|
* DTrace SDT provider definitions
|
|
|
|
*/
|
2015-10-02 19:54:15 +03:00
|
|
|
SDT_PROVIDER_DECLARE(proc);
|
|
|
|
SDT_PROBE_DEFINE1(proc, kernel, , exec, "char *");
|
|
|
|
SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *");
|
|
|
|
SDT_PROBE_DEFINE1(proc, kernel, , exec__failure, "int");
|
DTrace: Add an SDT (Statically Defined Tracing) provider framework, and
implement most of the proc provider. Adds proc:::create, exec,
exec_success, exec_faillure, signal_send, signal_discard, signal_handle,
lwp_create, lwp_start, lwp_exit.
2010-03-02 00:10:13 +03:00
|
|
|
|
2000-12-08 22:42:11 +03:00
|
|
|
/*
|
|
|
|
* Exec function switch:
|
|
|
|
*
|
|
|
|
* Note that each makecmds function is responsible for loading the
|
|
|
|
* exec package with the necessary functions for any exec-type-specific
|
|
|
|
* handling.
|
|
|
|
*
|
|
|
|
* Functions for specific exec types should be defined in their own
|
|
|
|
* header file.
|
|
|
|
*/
|
2001-02-26 23:43:25 +03:00
|
|
|
static const struct execsw **execsw = NULL;
|
|
|
|
static int nexecs;
|
|
|
|
|
2008-11-19 21:35:57 +03:00
|
|
|
u_int exec_maxhdrsz; /* must not be static - used by netbsd32 */
|
2000-12-08 22:42:11 +03:00
|
|
|
|
|
|
|
/* list of dynamically loaded execsw entries */
|
2008-11-19 21:35:57 +03:00
|
|
|
static LIST_HEAD(execlist_head, exec_entry) ex_head =
|
|
|
|
LIST_HEAD_INITIALIZER(ex_head);
|
2000-12-08 22:42:11 +03:00
|
|
|
struct exec_entry {
|
2001-02-26 23:43:25 +03:00
|
|
|
LIST_ENTRY(exec_entry) ex_list;
|
2008-11-19 21:35:57 +03:00
|
|
|
SLIST_ENTRY(exec_entry) ex_slist;
|
|
|
|
const struct execsw *ex_sw;
|
2000-12-08 22:42:11 +03:00
|
|
|
};
|
|
|
|
|
2005-07-10 08:20:34 +04:00
|
|
|
#ifndef __HAVE_SYSCALL_INTERN
|
|
|
|
void syscall(void);
|
|
|
|
#endif
|
|
|
|
|
2015-12-01 01:47:19 +03:00
|
|
|
/* NetBSD autoloadable syscalls */
|
|
|
|
#ifdef MODULAR
|
|
|
|
#include <kern/syscalls_autoload.c>
|
|
|
|
#endif
|
|
|
|
|
2003-09-10 20:43:34 +04:00
|
|
|
/* NetBSD emul struct */
|
2008-11-19 21:35:57 +03:00
|
|
|
struct emul emul_netbsd = {
|
2009-10-25 04:14:03 +03:00
|
|
|
.e_name = "netbsd",
|
2014-02-02 08:28:42 +04:00
|
|
|
#ifdef EMUL_NATIVEROOT
|
|
|
|
.e_path = EMUL_NATIVEROOT,
|
|
|
|
#else
|
|
|
|
.e_path = NULL,
|
|
|
|
#endif
|
2000-12-11 08:28:59 +03:00
|
|
|
#ifndef __HAVE_MINIMAL_EMUL
|
2009-10-25 04:14:03 +03:00
|
|
|
.e_flags = EMUL_HAS_SYS___syscall,
|
|
|
|
.e_errno = NULL,
|
|
|
|
.e_nosys = SYS_syscall,
|
|
|
|
.e_nsysent = SYS_NSYSENT,
|
2015-12-01 01:47:19 +03:00
|
|
|
#endif
|
|
|
|
#ifdef MODULAR
|
|
|
|
.e_sc_autoload = netbsd_syscalls_autoload,
|
2000-12-11 08:28:59 +03:00
|
|
|
#endif
|
2009-10-25 04:14:03 +03:00
|
|
|
.e_sysent = sysent,
|
2000-11-21 03:37:49 +03:00
|
|
|
#ifdef SYSCALL_DEBUG
|
2009-10-25 04:14:03 +03:00
|
|
|
.e_syscallnames = syscallnames,
|
2000-11-21 03:37:49 +03:00
|
|
|
#else
|
2009-10-25 04:14:03 +03:00
|
|
|
.e_syscallnames = NULL,
|
2000-11-21 03:37:49 +03:00
|
|
|
#endif
|
2009-10-25 04:14:03 +03:00
|
|
|
.e_sendsig = sendsig,
|
|
|
|
.e_trapsignal = trapsignal,
|
|
|
|
.e_tracesig = NULL,
|
|
|
|
.e_sigcode = NULL,
|
|
|
|
.e_esigcode = NULL,
|
|
|
|
.e_sigobject = NULL,
|
|
|
|
.e_setregs = setregs,
|
|
|
|
.e_proc_exec = NULL,
|
|
|
|
.e_proc_fork = NULL,
|
|
|
|
.e_proc_exit = NULL,
|
|
|
|
.e_lwp_fork = NULL,
|
|
|
|
.e_lwp_exit = NULL,
|
2000-12-11 08:28:59 +03:00
|
|
|
#ifdef __HAVE_SYSCALL_INTERN
|
2009-10-25 04:14:03 +03:00
|
|
|
.e_syscall_intern = syscall_intern,
|
2000-12-11 08:28:59 +03:00
|
|
|
#else
|
2009-10-25 04:14:03 +03:00
|
|
|
.e_syscall = syscall,
|
2000-12-11 08:28:59 +03:00
|
|
|
#endif
|
2009-10-25 04:14:03 +03:00
|
|
|
.e_sysctlovly = NULL,
|
|
|
|
.e_fault = NULL,
|
|
|
|
.e_vm_default_addr = uvm_default_mapaddr,
|
|
|
|
.e_usertrap = NULL,
|
|
|
|
.e_ucsize = sizeof(ucontext_t),
|
|
|
|
.e_startlwp = startlwp
|
2000-11-21 03:37:49 +03:00
|
|
|
};
|
|
|
|
|
2000-12-08 22:42:11 +03:00
|
|
|
/*
|
|
|
|
* Exec lock. Used to control access to execsw[] structures.
|
|
|
|
* This must not be static so that netbsd32 can access it, too.
|
|
|
|
*/
|
2012-05-03 03:33:11 +04:00
|
|
|
krwlock_t exec_lock;
|
|
|
|
|
|
|
|
static kmutex_t sigobject_lock;
|
2007-12-26 19:01:34 +03:00
|
|
|
|
2012-02-12 03:16:15 +04:00
|
|
|
/*
|
|
|
|
* Data used between a loadvm and execve part of an "exec" operation
|
|
|
|
*/
|
|
|
|
struct execve_data {
|
|
|
|
struct exec_package ed_pack;
|
|
|
|
struct pathbuf *ed_pathbuf;
|
|
|
|
struct vattr ed_attr;
|
|
|
|
struct ps_strings ed_arginfo;
|
|
|
|
char *ed_argp;
|
|
|
|
const char *ed_pathstring;
|
|
|
|
char *ed_resolvedpathbuf;
|
|
|
|
size_t ed_ps_strings_sz;
|
|
|
|
int ed_szsigcode;
|
2014-04-15 19:50:16 +04:00
|
|
|
size_t ed_argslen;
|
2012-02-12 03:16:15 +04:00
|
|
|
long ed_argc;
|
|
|
|
long ed_envc;
|
|
|
|
};
|
|
|
|
|
|
|
|
/*
|
|
|
|
* data passed from parent lwp to child during a posix_spawn()
|
|
|
|
*/
|
|
|
|
struct spawn_exec_data {
|
|
|
|
struct execve_data sed_exec;
|
2012-04-08 15:27:44 +04:00
|
|
|
struct posix_spawn_file_actions
|
2012-02-12 03:16:15 +04:00
|
|
|
*sed_actions;
|
|
|
|
struct posix_spawnattr *sed_attrs;
|
|
|
|
struct proc *sed_parent;
|
|
|
|
kcondvar_t sed_cv_child_ready;
|
|
|
|
kmutex_t sed_mtx_child;
|
|
|
|
int sed_error;
|
2012-04-08 15:27:44 +04:00
|
|
|
volatile uint32_t sed_refcnt;
|
2012-02-12 03:16:15 +04:00
|
|
|
};
|
|
|
|
|
Carve out KVA for execargs on boot from an exec_map like we used to.
Candidate fix for PR kern/45718: `processes sometimes get stuck and
spin in vm_map', a problem that has been plaguing all our 32-bit
ports for years.
Since we currently use large (256k) buffers for execargs, and since
nobody has stepped up to tackle breaking them into bite-sized (or at
least page-sized) chunks, after KVA gets sufficiently fragmented we
can't allocate new execargs buffers from kernel_map.
Until 2008, we always carved out KVA for execargs on boot with a uvm
submap exec_map of kernel_map. Then ad@ found that the uvm_km_free
call, to discard them when done, cost about 100us, which a pool
avoided:
https://mail-index.NetBSD.org/tech-kern/2008/06/25/msg001854.html
https://mail-index.NetBSD.org/tech-kern/2008/06/26/msg001859.html
ad@ _simultaneously_ introduced a pool _and_ eliminated the reserved
KVA in the exec_map submap. This change preserves the pool, but
restores exec_map (with less code, by putting it in MI code instead
of copying it in every MD initialization routine).
Patch proposed on tech-kern:
https://mail-index.NetBSD.org/tech-kern/2017/10/19/msg022461.html
Patch tested by bouyer@:
https://mail-index.NetBSD.org/tech-kern/2017/10/20/msg022465.html
I previously discussed the issue on tech-kern before I knew of the
history around exec_map:
https://mail-index.NetBSD.org/tech-kern/2012/12/09/msg014695.html
The candidate workaround I proposed of using pool_setlowat to force
preallocation of KVA would also force preallocation of physical RAM,
which is a waste not incurred by using exec_map, and which is part of
why I never committed it.
There may remain a general problem that if thread A calls pool_get
and tries to service that request by a uvm_km_alloc call that hangs
because KVA is scarce, and thread B does pool_put, the pool_put in
thread B will not notify the pool_get in thread A that it doesn't
need to wait for KVA, and so thread A may continue to hang in
uvm_km_alloc. However,
(a) That won't apply here, because there is exactly as much KVA
available in exec_map as exec_pool will ever try to use.
(b) It is possible that may not even matter in other cases as long as
the page daemon eventually tries to shrink the pool, which will cause
a uvm_km_free that can unhang the hung uvm_km_alloc.
XXX pullup-8
XXX pullup-7
XXX pullup-6
XXX pullup-5, perhaps...
2017-10-20 17:48:43 +03:00
|
|
|
static struct vm_map *exec_map;
|
|
|
|
static struct pool exec_pool;
|
|
|
|
|
2008-07-02 21:28:54 +04:00
|
|
|
static void *
|
|
|
|
exec_pool_alloc(struct pool *pp, int flags)
|
|
|
|
{
|
|
|
|
|
Carve out KVA for execargs on boot from an exec_map like we used to.
Candidate fix for PR kern/45718: `processes sometimes get stuck and
spin in vm_map', a problem that has been plaguing all our 32-bit
ports for years.
Since we currently use large (256k) buffers for execargs, and since
nobody has stepped up to tackle breaking them into bite-sized (or at
least page-sized) chunks, after KVA gets sufficiently fragmented we
can't allocate new execargs buffers from kernel_map.
Until 2008, we always carved out KVA for execargs on boot with a uvm
submap exec_map of kernel_map. Then ad@ found that the uvm_km_free
call, to discard them when done, cost about 100us, which a pool
avoided:
https://mail-index.NetBSD.org/tech-kern/2008/06/25/msg001854.html
https://mail-index.NetBSD.org/tech-kern/2008/06/26/msg001859.html
ad@ _simultaneously_ introduced a pool _and_ eliminated the reserved
KVA in the exec_map submap. This change preserves the pool, but
restores exec_map (with less code, by putting it in MI code instead
of copying it in every MD initialization routine).
Patch proposed on tech-kern:
https://mail-index.NetBSD.org/tech-kern/2017/10/19/msg022461.html
Patch tested by bouyer@:
https://mail-index.NetBSD.org/tech-kern/2017/10/20/msg022465.html
I previously discussed the issue on tech-kern before I knew of the
history around exec_map:
https://mail-index.NetBSD.org/tech-kern/2012/12/09/msg014695.html
The candidate workaround I proposed of using pool_setlowat to force
preallocation of KVA would also force preallocation of physical RAM,
which is a waste not incurred by using exec_map, and which is part of
why I never committed it.
There may remain a general problem that if thread A calls pool_get
and tries to service that request by a uvm_km_alloc call that hangs
because KVA is scarce, and thread B does pool_put, the pool_put in
thread B will not notify the pool_get in thread A that it doesn't
need to wait for KVA, and so thread A may continue to hang in
uvm_km_alloc. However,
(a) That won't apply here, because there is exactly as much KVA
available in exec_map as exec_pool will ever try to use.
(b) It is possible that may not even matter in other cases as long as
the page daemon eventually tries to shrink the pool, which will cause
a uvm_km_free that can unhang the hung uvm_km_alloc.
XXX pullup-8
XXX pullup-7
XXX pullup-6
XXX pullup-5, perhaps...
2017-10-20 17:48:43 +03:00
|
|
|
return (void *)uvm_km_alloc(exec_map, NCARGS, 0,
|
2008-07-02 21:28:54 +04:00
|
|
|
UVM_KMF_PAGEABLE | UVM_KMF_WAITVA);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
exec_pool_free(struct pool *pp, void *addr)
|
|
|
|
{
|
|
|
|
|
Carve out KVA for execargs on boot from an exec_map like we used to.
Candidate fix for PR kern/45718: `processes sometimes get stuck and
spin in vm_map', a problem that has been plaguing all our 32-bit
ports for years.
Since we currently use large (256k) buffers for execargs, and since
nobody has stepped up to tackle breaking them into bite-sized (or at
least page-sized) chunks, after KVA gets sufficiently fragmented we
can't allocate new execargs buffers from kernel_map.
Until 2008, we always carved out KVA for execargs on boot with a uvm
submap exec_map of kernel_map. Then ad@ found that the uvm_km_free
call, to discard them when done, cost about 100us, which a pool
avoided:
https://mail-index.NetBSD.org/tech-kern/2008/06/25/msg001854.html
https://mail-index.NetBSD.org/tech-kern/2008/06/26/msg001859.html
ad@ _simultaneously_ introduced a pool _and_ eliminated the reserved
KVA in the exec_map submap. This change preserves the pool, but
restores exec_map (with less code, by putting it in MI code instead
of copying it in every MD initialization routine).
Patch proposed on tech-kern:
https://mail-index.NetBSD.org/tech-kern/2017/10/19/msg022461.html
Patch tested by bouyer@:
https://mail-index.NetBSD.org/tech-kern/2017/10/20/msg022465.html
I previously discussed the issue on tech-kern before I knew of the
history around exec_map:
https://mail-index.NetBSD.org/tech-kern/2012/12/09/msg014695.html
The candidate workaround I proposed of using pool_setlowat to force
preallocation of KVA would also force preallocation of physical RAM,
which is a waste not incurred by using exec_map, and which is part of
why I never committed it.
There may remain a general problem that if thread A calls pool_get
and tries to service that request by a uvm_km_alloc call that hangs
because KVA is scarce, and thread B does pool_put, the pool_put in
thread B will not notify the pool_get in thread A that it doesn't
need to wait for KVA, and so thread A may continue to hang in
uvm_km_alloc. However,
(a) That won't apply here, because there is exactly as much KVA
available in exec_map as exec_pool will ever try to use.
(b) It is possible that may not even matter in other cases as long as
the page daemon eventually tries to shrink the pool, which will cause
a uvm_km_free that can unhang the hung uvm_km_alloc.
XXX pullup-8
XXX pullup-7
XXX pullup-6
XXX pullup-5, perhaps...
2017-10-20 17:48:43 +03:00
|
|
|
uvm_km_free(exec_map, (vaddr_t)addr, NCARGS, UVM_KMF_PAGEABLE);
|
2008-07-02 21:28:54 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct pool_allocator exec_palloc = {
|
|
|
|
.pa_alloc = exec_pool_alloc,
|
|
|
|
.pa_free = exec_pool_free,
|
|
|
|
.pa_pagesz = NCARGS
|
|
|
|
};
|
|
|
|
|
1994-06-29 10:29:24 +04:00
|
|
|
/*
|
|
|
|
* check exec:
|
|
|
|
* given an "executable" described in the exec package's namei info,
|
|
|
|
* see what we can do with it.
|
|
|
|
*
|
|
|
|
* ON ENTRY:
|
|
|
|
* exec package with appropriate namei info
|
2005-12-11 15:16:03 +03:00
|
|
|
* lwp pointer of exec'ing lwp
|
1994-06-29 10:29:24 +04:00
|
|
|
* NO SELF-LOCKED VNODES
|
|
|
|
*
|
|
|
|
* ON EXIT:
|
|
|
|
* error: nothing held, etc. exec header still allocated.
|
1996-10-01 03:18:43 +04:00
|
|
|
* ok: filled exec package, executable's vnode (unlocked).
|
1994-06-29 10:29:24 +04:00
|
|
|
*
|
|
|
|
* EXEC SWITCH ENTRY:
|
|
|
|
* Locked vnode to check, exec package, proc.
|
|
|
|
*
|
|
|
|
* EXEC SWITCH EXIT:
|
1996-10-01 03:18:43 +04:00
|
|
|
* ok: return 0, filled exec package, executable's vnode (unlocked).
|
1994-06-29 10:29:24 +04:00
|
|
|
* error: destructive:
|
|
|
|
* everything deallocated execept exec header.
|
1996-09-27 03:34:46 +04:00
|
|
|
* non-destructive:
|
1996-10-01 03:18:43 +04:00
|
|
|
* error code, executable's vnode (unlocked),
|
1996-09-27 03:34:46 +04:00
|
|
|
* exec header unmodified.
|
1994-06-29 10:29:24 +04:00
|
|
|
*/
|
|
|
|
int
|
2012-05-03 03:33:11 +04:00
|
|
|
/*ARGSUSED*/
|
2010-11-19 09:44:33 +03:00
|
|
|
check_exec(struct lwp *l, struct exec_package *epp, struct pathbuf *pb)
|
1994-06-29 10:29:24 +04:00
|
|
|
{
|
2001-02-26 23:43:25 +03:00
|
|
|
int error, i;
|
|
|
|
struct vnode *vp;
|
2010-05-02 09:30:20 +04:00
|
|
|
struct nameidata nd;
|
2001-02-26 23:43:25 +03:00
|
|
|
size_t resid;
|
1994-06-29 10:29:24 +04:00
|
|
|
|
2017-11-14 01:01:45 +03:00
|
|
|
#if 1
|
|
|
|
// grab the absolute pathbuf here before namei() trashes it.
|
|
|
|
pathbuf_copystring(pb, epp->ep_resolvedname, PATH_MAX);
|
|
|
|
#endif
|
2010-11-30 13:43:01 +03:00
|
|
|
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | TRYEMULROOT, pb);
|
2010-05-02 09:30:20 +04:00
|
|
|
|
1994-06-29 10:29:24 +04:00
|
|
|
/* first get the vnode */
|
2010-05-02 09:30:20 +04:00
|
|
|
if ((error = namei(&nd)) != 0)
|
1994-06-29 10:29:24 +04:00
|
|
|
return error;
|
2010-05-02 09:30:20 +04:00
|
|
|
epp->ep_vp = vp = nd.ni_vp;
|
2017-11-14 01:01:45 +03:00
|
|
|
#if 0
|
2017-11-13 23:38:31 +03:00
|
|
|
/*
|
|
|
|
* XXX: can't use nd.ni_pnbuf, because although pb contains an
|
|
|
|
* absolute path, nd.ni_pnbuf does not if the path contains symlinks.
|
|
|
|
*/
|
2013-12-24 18:47:04 +04:00
|
|
|
/* normally this can't fail */
|
2014-06-22 21:23:34 +04:00
|
|
|
error = copystr(nd.ni_pnbuf, epp->ep_resolvedname, PATH_MAX, NULL);
|
|
|
|
KASSERT(error == 0);
|
2017-11-13 23:38:31 +03:00
|
|
|
#endif
|
2010-05-02 09:30:20 +04:00
|
|
|
|
2010-05-03 03:22:51 +04:00
|
|
|
#ifdef DIAGNOSTIC
|
|
|
|
/* paranoia (take this out once namei stuff stabilizes) */
|
2010-11-30 13:29:57 +03:00
|
|
|
memset(nd.ni_pnbuf, '~', PATH_MAX);
|
2010-05-02 09:30:20 +04:00
|
|
|
#endif
|
1994-06-29 10:29:24 +04:00
|
|
|
|
1997-05-08 20:19:43 +04:00
|
|
|
/* check access and type */
|
1994-06-29 10:29:24 +04:00
|
|
|
if (vp->v_type != VREG) {
|
1997-04-10 23:45:40 +04:00
|
|
|
error = EACCES;
|
1994-06-29 10:29:24 +04:00
|
|
|
goto bad1;
|
|
|
|
}
|
2007-11-26 22:01:26 +03:00
|
|
|
if ((error = VOP_ACCESS(vp, VEXEC, l->l_cred)) != 0)
|
1997-05-08 20:19:43 +04:00
|
|
|
goto bad1;
|
1994-06-29 10:29:24 +04:00
|
|
|
|
|
|
|
/* get attributes */
|
2007-11-26 22:01:26 +03:00
|
|
|
if ((error = VOP_GETATTR(vp, epp->ep_vap, l->l_cred)) != 0)
|
1994-06-29 10:29:24 +04:00
|
|
|
goto bad1;
|
|
|
|
|
|
|
|
/* Check mount point */
|
|
|
|
if (vp->v_mount->mnt_flag & MNT_NOEXEC) {
|
|
|
|
error = EACCES;
|
|
|
|
goto bad1;
|
|
|
|
}
|
2001-06-15 21:24:19 +04:00
|
|
|
if (vp->v_mount->mnt_flag & MNT_NOSUID)
|
1997-05-08 14:19:10 +04:00
|
|
|
epp->ep_vap->va_mode &= ~(S_ISUID | S_ISGID);
|
1994-06-29 10:29:24 +04:00
|
|
|
|
|
|
|
/* try to open it */
|
2007-11-26 22:01:26 +03:00
|
|
|
if ((error = VOP_OPEN(vp, FREAD, l->l_cred)) != 0)
|
1994-06-29 10:29:24 +04:00
|
|
|
goto bad1;
|
|
|
|
|
1999-02-27 02:38:55 +03:00
|
|
|
/* unlock vp, since we need it unlocked from here on out. */
|
2010-06-24 16:58:48 +04:00
|
|
|
VOP_UNLOCK(vp);
|
1996-10-01 03:18:43 +04:00
|
|
|
|
2006-07-22 14:34:26 +04:00
|
|
|
#if NVERIEXEC > 0
|
2010-05-02 09:30:20 +04:00
|
|
|
error = veriexec_verify(l, vp, epp->ep_resolvedname,
|
2006-12-20 14:35:29 +03:00
|
|
|
epp->ep_flags & EXEC_INDIR ? VERIEXEC_INDIRECT : VERIEXEC_DIRECT,
|
2007-02-08 03:26:50 +03:00
|
|
|
NULL);
|
|
|
|
if (error)
|
2006-12-23 20:23:51 +03:00
|
|
|
goto bad2;
|
2006-07-22 14:34:26 +04:00
|
|
|
#endif /* NVERIEXEC > 0 */
|
2002-10-29 15:31:20 +03:00
|
|
|
|
2006-11-22 05:02:51 +03:00
|
|
|
#ifdef PAX_SEGVGUARD
|
2010-05-02 09:30:20 +04:00
|
|
|
error = pax_segvguard(l, vp, epp->ep_resolvedname, false);
|
2006-12-23 20:23:51 +03:00
|
|
|
if (error)
|
|
|
|
goto bad2;
|
2006-11-22 05:02:51 +03:00
|
|
|
#endif /* PAX_SEGVGUARD */
|
|
|
|
|
1994-06-29 10:29:24 +04:00
|
|
|
/* now we have the file, get the exec header */
|
1996-02-04 05:15:01 +03:00
|
|
|
error = vn_rdwr(UIO_READ, vp, epp->ep_hdr, epp->ep_hdrlen, 0,
|
2006-07-24 02:06:03 +04:00
|
|
|
UIO_SYSSPACE, 0, l->l_cred, &resid, NULL);
|
1996-02-04 05:15:01 +03:00
|
|
|
if (error)
|
1994-06-29 10:29:24 +04:00
|
|
|
goto bad2;
|
|
|
|
epp->ep_hdrvalid = epp->ep_hdrlen - resid;
|
|
|
|
|
2001-02-14 21:21:42 +03:00
|
|
|
/*
|
|
|
|
* Set up default address space limits. Can be overridden
|
|
|
|
* by individual exec packages.
|
|
|
|
*/
|
2016-08-06 18:13:13 +03:00
|
|
|
epp->ep_vm_minaddr = exec_vm_minaddr(VM_MIN_ADDRESS);
|
2001-02-14 21:21:42 +03:00
|
|
|
epp->ep_vm_maxaddr = VM_MAXUSER_ADDRESS;
|
2016-08-06 18:13:13 +03:00
|
|
|
|
1994-06-29 10:29:24 +04:00
|
|
|
/*
|
|
|
|
* set up the vmcmds for creation of the process
|
|
|
|
* address space
|
|
|
|
*/
|
|
|
|
error = ENOEXEC;
|
2007-04-22 12:29:55 +04:00
|
|
|
for (i = 0; i < nexecs; i++) {
|
1995-05-02 02:36:45 +04:00
|
|
|
int newerror;
|
|
|
|
|
2000-12-08 22:42:11 +03:00
|
|
|
epp->ep_esch = execsw[i];
|
2005-12-11 15:16:03 +03:00
|
|
|
newerror = (*execsw[i]->es_makecmds)(l, epp);
|
2007-04-22 12:29:55 +04:00
|
|
|
|
|
|
|
if (!newerror) {
|
2011-08-25 23:14:07 +04:00
|
|
|
/* Seems ok: check that entry point is not too high */
|
2011-08-26 13:29:16 +04:00
|
|
|
if (epp->ep_entry > epp->ep_vm_maxaddr) {
|
2011-08-26 13:13:08 +04:00
|
|
|
#ifdef DIAGNOSTIC
|
2011-09-17 01:02:28 +04:00
|
|
|
printf("%s: rejecting %p due to "
|
2011-11-24 21:09:14 +04:00
|
|
|
"too high entry address (> %p)\n",
|
|
|
|
__func__, (void *)epp->ep_entry,
|
|
|
|
(void *)epp->ep_vm_maxaddr);
|
2011-08-26 13:13:08 +04:00
|
|
|
#endif
|
2011-08-25 23:14:07 +04:00
|
|
|
error = ENOEXEC;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* Seems ok: check that entry point is not too low */
|
2011-08-26 13:29:16 +04:00
|
|
|
if (epp->ep_entry < epp->ep_vm_minaddr) {
|
2011-08-26 13:13:08 +04:00
|
|
|
#ifdef DIAGNOSTIC
|
2011-09-17 01:02:28 +04:00
|
|
|
printf("%s: rejecting %p due to "
|
2011-11-24 21:09:14 +04:00
|
|
|
"too low entry address (< %p)\n",
|
|
|
|
__func__, (void *)epp->ep_entry,
|
|
|
|
(void *)epp->ep_vm_minaddr);
|
2011-08-26 13:13:08 +04:00
|
|
|
#endif
|
2007-04-22 12:29:55 +04:00
|
|
|
error = ENOEXEC;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check limits */
|
|
|
|
if ((epp->ep_tsize > MAXTSIZ) ||
|
|
|
|
(epp->ep_dsize > (u_quad_t)l->l_proc->p_rlimit
|
|
|
|
[RLIMIT_DATA].rlim_cur)) {
|
2011-08-26 13:13:08 +04:00
|
|
|
#ifdef DIAGNOSTIC
|
2011-08-26 13:29:16 +04:00
|
|
|
printf("%s: rejecting due to "
|
2011-11-24 21:09:14 +04:00
|
|
|
"limits (t=%llu > %llu || d=%llu > %llu)\n",
|
|
|
|
__func__,
|
|
|
|
(unsigned long long)epp->ep_tsize,
|
|
|
|
(unsigned long long)MAXTSIZ,
|
|
|
|
(unsigned long long)epp->ep_dsize,
|
2011-11-24 23:55:22 +04:00
|
|
|
(unsigned long long)
|
|
|
|
l->l_proc->p_rlimit[RLIMIT_DATA].rlim_cur);
|
2011-08-26 13:13:08 +04:00
|
|
|
#endif
|
2007-04-22 12:29:55 +04:00
|
|
|
error = ENOMEM;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2015-10-22 14:48:02 +03:00
|
|
|
/*
|
|
|
|
* Reset all the fields that may have been modified by the
|
|
|
|
* loader.
|
|
|
|
*/
|
|
|
|
KASSERT(epp->ep_emul_arg == NULL);
|
2007-04-22 12:29:55 +04:00
|
|
|
if (epp->ep_emul_root != NULL) {
|
|
|
|
vrele(epp->ep_emul_root);
|
|
|
|
epp->ep_emul_root = NULL;
|
|
|
|
}
|
|
|
|
if (epp->ep_interp != NULL) {
|
|
|
|
vrele(epp->ep_interp);
|
|
|
|
epp->ep_interp = NULL;
|
|
|
|
}
|
2015-10-22 14:48:02 +03:00
|
|
|
epp->ep_pax_flags = 0;
|
2007-04-22 12:29:55 +04:00
|
|
|
|
1995-05-02 02:36:45 +04:00
|
|
|
/* make sure the first "interesting" error code is saved. */
|
2007-04-22 12:29:55 +04:00
|
|
|
if (error == ENOEXEC)
|
1995-05-02 02:36:45 +04:00
|
|
|
error = newerror;
|
2000-11-21 03:37:49 +03:00
|
|
|
|
2007-04-22 12:29:55 +04:00
|
|
|
if (epp->ep_flags & EXEC_DESTR)
|
|
|
|
/* Error from "#!" code, tidied up by recursive call */
|
1994-06-29 10:29:24 +04:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2007-10-02 16:01:17 +04:00
|
|
|
/* not found, error */
|
|
|
|
|
1994-06-29 10:29:24 +04:00
|
|
|
/*
|
|
|
|
* free any vmspace-creation commands,
|
|
|
|
* and release their references
|
|
|
|
*/
|
|
|
|
kill_vmcmds(&epp->ep_vmcmds);
|
|
|
|
|
|
|
|
bad2:
|
|
|
|
/*
|
1999-02-27 02:38:55 +03:00
|
|
|
* close and release the vnode, restore the old one, free the
|
1994-06-29 10:29:24 +04:00
|
|
|
* pathname buf, and punt.
|
|
|
|
*/
|
1999-02-27 02:38:55 +03:00
|
|
|
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
|
2007-11-26 22:01:26 +03:00
|
|
|
VOP_CLOSE(vp, FREAD, l->l_cred);
|
1999-02-27 02:38:55 +03:00
|
|
|
vput(vp);
|
1994-06-29 10:29:24 +04:00
|
|
|
return error;
|
|
|
|
|
|
|
|
bad1:
|
|
|
|
/*
|
|
|
|
* free the namei pathname buffer, and put the vnode
|
|
|
|
* (which we don't yet have open).
|
|
|
|
*/
|
1996-10-01 03:18:43 +04:00
|
|
|
vput(vp); /* was still locked */
|
1994-06-29 10:29:24 +04:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2004-07-19 01:29:26 +04:00
|
|
|
#ifdef __MACHINE_STACK_GROWS_UP
|
|
|
|
#define STACK_PTHREADSPACE NBPG
|
|
|
|
#else
|
|
|
|
#define STACK_PTHREADSPACE 0
|
|
|
|
#endif
|
|
|
|
|
2005-07-12 00:15:26 +04:00
|
|
|
static int
|
|
|
|
execve_fetch_element(char * const *array, size_t index, char **value)
|
|
|
|
{
|
|
|
|
return copyin(array + index, value, sizeof(*value));
|
|
|
|
}
|
|
|
|
|
1994-06-29 10:29:24 +04:00
|
|
|
/*
|
|
|
|
* exec system call
|
|
|
|
*/
|
1996-02-09 21:59:18 +03:00
|
|
|
int
|
2007-12-21 02:02:38 +03:00
|
|
|
sys_execve(struct lwp *l, const struct sys_execve_args *uap, register_t *retval)
|
1995-09-20 01:40:36 +04:00
|
|
|
{
|
2007-12-21 02:02:38 +03:00
|
|
|
/* {
|
2001-02-26 23:43:25 +03:00
|
|
|
syscallarg(const char *) path;
|
|
|
|
syscallarg(char * const *) argp;
|
|
|
|
syscallarg(char * const *) envp;
|
2007-12-21 02:02:38 +03:00
|
|
|
} */
|
2005-07-12 00:15:26 +04:00
|
|
|
|
|
|
|
return execve1(l, SCARG(uap, path), SCARG(uap, argp),
|
|
|
|
SCARG(uap, envp), execve_fetch_element);
|
|
|
|
}
|
|
|
|
|
2014-02-17 23:29:46 +04:00
|
|
|
int
|
2011-08-08 16:08:52 +04:00
|
|
|
sys_fexecve(struct lwp *l, const struct sys_fexecve_args *uap,
|
|
|
|
register_t *retval)
|
|
|
|
{
|
|
|
|
/* {
|
|
|
|
syscallarg(int) fd;
|
|
|
|
syscallarg(char * const *) argp;
|
|
|
|
syscallarg(char * const *) envp;
|
|
|
|
} */
|
|
|
|
|
|
|
|
return ENOSYS;
|
|
|
|
}
|
|
|
|
|
2008-11-19 21:35:57 +03:00
|
|
|
/*
|
|
|
|
* Load modules to try and execute an image that we do not understand.
|
|
|
|
* If no execsw entries are present, we load those likely to be needed
|
|
|
|
* in order to run native images only. Otherwise, we autoload all
|
|
|
|
* possible modules that could let us run the binary. XXX lame
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
exec_autoload(void)
|
|
|
|
{
|
|
|
|
#ifdef MODULAR
|
|
|
|
static const char * const native[] = {
|
|
|
|
"exec_elf32",
|
|
|
|
"exec_elf64",
|
|
|
|
"exec_script",
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
static const char * const compat[] = {
|
|
|
|
"exec_elf32",
|
|
|
|
"exec_elf64",
|
|
|
|
"exec_script",
|
|
|
|
"exec_aout",
|
|
|
|
"exec_coff",
|
|
|
|
"exec_ecoff",
|
|
|
|
"compat_aoutm68k",
|
|
|
|
"compat_netbsd32",
|
|
|
|
"compat_sunos",
|
|
|
|
"compat_sunos32",
|
|
|
|
"compat_ultrix",
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
char const * const *list;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
list = (nexecs == 0 ? native : compat);
|
|
|
|
for (i = 0; list[i] != NULL; i++) {
|
2013-09-12 23:01:38 +04:00
|
|
|
if (module_autoload(list[i], MODULE_CLASS_EXEC) != 0) {
|
2014-02-17 23:29:46 +04:00
|
|
|
continue;
|
2008-11-19 21:35:57 +03:00
|
|
|
}
|
2014-02-17 23:29:46 +04:00
|
|
|
yield();
|
2008-11-19 21:35:57 +03:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2015-09-12 20:04:57 +03:00
|
|
|
static int
|
|
|
|
makepathbuf(struct lwp *l, const char *upath, struct pathbuf **pbp,
|
|
|
|
size_t *offs)
|
2015-09-11 04:23:37 +03:00
|
|
|
{
|
|
|
|
char *path, *bp;
|
2015-09-12 20:04:57 +03:00
|
|
|
size_t len, tlen;
|
2015-09-11 04:23:37 +03:00
|
|
|
int error;
|
|
|
|
struct cwdinfo *cwdi;
|
|
|
|
|
|
|
|
path = PNBUF_GET();
|
|
|
|
error = copyinstr(upath, path, MAXPATHLEN, &len);
|
|
|
|
if (error) {
|
|
|
|
PNBUF_PUT(path);
|
|
|
|
DPRINTF(("%s: copyin path @%p %d\n", __func__, upath, error));
|
2015-09-12 20:04:57 +03:00
|
|
|
return error;
|
2015-09-11 04:23:37 +03:00
|
|
|
}
|
|
|
|
|
2015-09-12 20:04:57 +03:00
|
|
|
if (path[0] == '/') {
|
|
|
|
*offs = 0;
|
2015-09-11 04:23:37 +03:00
|
|
|
goto out;
|
2015-09-12 20:04:57 +03:00
|
|
|
}
|
2015-09-11 04:23:37 +03:00
|
|
|
|
|
|
|
len++;
|
|
|
|
if (len + 1 >= MAXPATHLEN)
|
|
|
|
goto out;
|
|
|
|
bp = path + MAXPATHLEN - len;
|
|
|
|
memmove(bp, path, len);
|
|
|
|
*(--bp) = '/';
|
|
|
|
|
2016-07-07 09:55:38 +03:00
|
|
|
cwdi = l->l_proc->p_cwdi;
|
2015-09-11 04:23:37 +03:00
|
|
|
rw_enter(&cwdi->cwdi_lock, RW_READER);
|
|
|
|
error = getcwd_common(cwdi->cwdi_cdir, NULL, &bp, path, MAXPATHLEN / 2,
|
|
|
|
GETCWD_CHECK_ACCESS, l);
|
|
|
|
rw_exit(&cwdi->cwdi_lock);
|
|
|
|
|
|
|
|
if (error) {
|
|
|
|
DPRINTF(("%s: getcwd_common path %s %d\n", __func__, path,
|
|
|
|
error));
|
|
|
|
goto out;
|
|
|
|
}
|
2015-09-12 20:04:57 +03:00
|
|
|
tlen = path + MAXPATHLEN - bp;
|
2015-09-11 04:23:37 +03:00
|
|
|
|
2015-09-12 20:04:57 +03:00
|
|
|
memmove(path, bp, tlen);
|
|
|
|
path[tlen] = '\0';
|
|
|
|
*offs = tlen - len;
|
2015-09-11 04:23:37 +03:00
|
|
|
out:
|
2015-09-12 20:04:57 +03:00
|
|
|
*pbp = pathbuf_assimilate(path);
|
|
|
|
return 0;
|
2015-09-11 04:23:37 +03:00
|
|
|
}
|
|
|
|
|
2016-08-06 18:13:13 +03:00
|
|
|
vaddr_t
|
|
|
|
exec_vm_minaddr(vaddr_t va_min)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* Increase va_min if we don't want NULL to be mappable by the
|
|
|
|
* process.
|
|
|
|
*/
|
2016-09-15 20:44:16 +03:00
|
|
|
#define VM_MIN_GUARD PAGE_SIZE
|
2016-08-06 18:13:13 +03:00
|
|
|
if (user_va0_disable && (va_min < VM_MIN_GUARD))
|
|
|
|
return VM_MIN_GUARD;
|
|
|
|
return va_min;
|
|
|
|
}
|
|
|
|
|
2012-02-12 03:16:15 +04:00
|
|
|
static int
|
|
|
|
execve_loadvm(struct lwp *l, const char *path, char * const *args,
|
|
|
|
char * const *envs, execve_fetch_element_t fetch_element,
|
|
|
|
struct execve_data * restrict data)
|
2005-07-12 00:15:26 +04:00
|
|
|
{
|
2014-04-11 06:27:20 +04:00
|
|
|
struct exec_package * const epp = &data->ed_pack;
|
2002-08-26 01:18:15 +04:00
|
|
|
int error;
|
2003-01-18 13:06:22 +03:00
|
|
|
struct proc *p;
|
2014-04-13 10:03:49 +04:00
|
|
|
char *dp;
|
2008-11-19 21:35:57 +03:00
|
|
|
u_int modgen;
|
2015-09-12 21:30:46 +03:00
|
|
|
size_t offs = 0; // XXX: GCC
|
2012-02-12 03:16:15 +04:00
|
|
|
|
|
|
|
KASSERT(data != NULL);
|
2001-02-26 23:43:25 +03:00
|
|
|
|
2003-01-18 13:06:22 +03:00
|
|
|
p = l->l_proc;
|
2014-02-17 23:29:46 +04:00
|
|
|
modgen = 0;
|
2007-02-10 00:55:00 +03:00
|
|
|
|
2015-10-02 19:54:15 +03:00
|
|
|
SDT_PROBE(proc, kernel, , exec, path, 0, 0, 0, 0);
|
DTrace: Add an SDT (Statically Defined Tracing) provider framework, and
implement most of the proc provider. Adds proc:::create, exec,
exec_success, exec_faillure, signal_send, signal_discard, signal_handle,
lwp_create, lwp_start, lwp_exit.
2010-03-02 00:10:13 +03:00
|
|
|
|
2008-02-25 00:46:04 +03:00
|
|
|
/*
|
|
|
|
* Check if we have exceeded our number of processes limit.
|
|
|
|
* This is so that we handle the case where a root daemon
|
|
|
|
* forked, ran setuid to become the desired user and is trying
|
|
|
|
* to exec. The obvious place to do the reference counting check
|
|
|
|
* is setuid(), but we don't do the reference counting check there
|
|
|
|
* like other OS's do because then all the programs that use setuid()
|
|
|
|
* must be modified to check the return code of setuid() and exit().
|
|
|
|
* It is dangerous to make setuid() fail, because it fails open and
|
|
|
|
* the program will continue to run as root. If we make it succeed
|
|
|
|
* and return an error code, again we are not enforcing the limit.
|
|
|
|
* The best place to enforce the limit is here, when the process tries
|
|
|
|
* to execute a new image, because eventually the process will need
|
|
|
|
* to call exec in order to do something useful.
|
|
|
|
*/
|
2008-11-19 21:35:57 +03:00
|
|
|
retry:
|
2012-03-13 22:40:26 +04:00
|
|
|
if (p->p_flag & PK_SUGID) {
|
|
|
|
if (kauth_authorize_process(l->l_cred, KAUTH_PROCESS_RLIMIT,
|
|
|
|
p, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
|
|
|
|
&p->p_rlimit[RLIMIT_NPROC],
|
|
|
|
KAUTH_ARG(RLIMIT_NPROC)) != 0 &&
|
|
|
|
chgproccnt(kauth_cred_getuid(l->l_cred), 0) >
|
|
|
|
p->p_rlimit[RLIMIT_NPROC].rlim_cur)
|
2008-02-25 00:46:04 +03:00
|
|
|
return EAGAIN;
|
2012-03-13 22:40:26 +04:00
|
|
|
}
|
2008-02-25 00:46:04 +03:00
|
|
|
|
2012-05-03 03:33:11 +04:00
|
|
|
/*
|
|
|
|
* Drain existing references and forbid new ones. The process
|
|
|
|
* should be left alone until we're done here. This is necessary
|
|
|
|
* to avoid race conditions - e.g. in ptrace() - that might allow
|
|
|
|
* a local user to illicitly obtain elevated privileges.
|
|
|
|
*/
|
|
|
|
rw_enter(&p->p_reflock, RW_WRITER);
|
|
|
|
|
2000-12-07 19:14:35 +03:00
|
|
|
/*
|
|
|
|
* Init the namei data to point the file user's program name.
|
|
|
|
* This is done here rather than in check_exec(), so that it's
|
|
|
|
* possible to override this settings if any of makecmd/probe
|
|
|
|
* functions call check_exec() recursively - for example,
|
|
|
|
* see exec_script_makecmds().
|
|
|
|
*/
|
2015-09-12 20:04:57 +03:00
|
|
|
if ((error = makepathbuf(l, path, &data->ed_pathbuf, &offs)) != 0)
|
2012-05-03 03:33:11 +04:00
|
|
|
goto clrflg;
|
2012-02-12 03:16:15 +04:00
|
|
|
data->ed_pathstring = pathbuf_stringcopy_get(data->ed_pathbuf);
|
|
|
|
data->ed_resolvedpathbuf = PNBUF_GET();
|
1994-06-29 10:29:24 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* initialize the fields of the exec package.
|
|
|
|
*/
|
2015-09-12 20:04:57 +03:00
|
|
|
epp->ep_kname = data->ed_pathstring + offs;
|
2014-04-11 06:27:20 +04:00
|
|
|
epp->ep_resolvedname = data->ed_resolvedpathbuf;
|
|
|
|
epp->ep_hdr = kmem_alloc(exec_maxhdrsz, KM_SLEEP);
|
|
|
|
epp->ep_hdrlen = exec_maxhdrsz;
|
|
|
|
epp->ep_hdrvalid = 0;
|
|
|
|
epp->ep_emul_arg = NULL;
|
|
|
|
epp->ep_emul_arg_free = NULL;
|
|
|
|
memset(&epp->ep_vmcmds, 0, sizeof(epp->ep_vmcmds));
|
|
|
|
epp->ep_vap = &data->ed_attr;
|
2014-12-15 00:35:24 +03:00
|
|
|
epp->ep_flags = (p->p_flag & PK_32) ? EXEC_FROM32 : 0;
|
2014-04-11 06:27:20 +04:00
|
|
|
MD_TOPDOWN_INIT(epp);
|
|
|
|
epp->ep_emul_root = NULL;
|
|
|
|
epp->ep_interp = NULL;
|
|
|
|
epp->ep_esch = NULL;
|
|
|
|
epp->ep_pax_flags = 0;
|
|
|
|
memset(epp->ep_machine_arch, 0, sizeof(epp->ep_machine_arch));
|
1994-06-29 10:29:24 +04:00
|
|
|
|
2007-02-10 00:55:00 +03:00
|
|
|
rw_enter(&exec_lock, RW_READER);
|
2000-12-08 22:42:11 +03:00
|
|
|
|
1994-06-29 10:29:24 +04:00
|
|
|
/* see if we can run it. */
|
2014-04-11 06:27:20 +04:00
|
|
|
if ((error = check_exec(l, epp, data->ed_pathbuf)) != 0) {
|
2016-06-09 03:17:45 +03:00
|
|
|
if (error != ENOENT && error != EACCES) {
|
2017-10-20 15:11:34 +03:00
|
|
|
DPRINTF(("%s: check exec failed for %s, error %d\n",
|
|
|
|
__func__, epp->ep_kname, error));
|
2007-12-27 01:49:19 +03:00
|
|
|
}
|
2012-05-03 03:33:11 +04:00
|
|
|
goto freehdr;
|
2007-09-21 00:51:38 +04:00
|
|
|
}
|
1994-06-29 10:29:24 +04:00
|
|
|
|
|
|
|
/* allocate an argument buffer */
|
2012-02-12 03:16:15 +04:00
|
|
|
data->ed_argp = pool_get(&exec_pool, PR_WAITOK);
|
|
|
|
KASSERT(data->ed_argp != NULL);
|
|
|
|
dp = data->ed_argp;
|
1994-06-29 10:29:24 +04:00
|
|
|
|
2014-04-13 10:03:49 +04:00
|
|
|
if ((error = copyinargs(data, args, envs, fetch_element, &dp)) != 0) {
|
1994-06-29 10:29:24 +04:00
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
|
2014-04-11 15:11:06 +04:00
|
|
|
/*
|
|
|
|
* Calculate the new stack size.
|
|
|
|
*/
|
|
|
|
|
2008-01-20 13:15:50 +03:00
|
|
|
#ifdef __MACHINE_STACK_GROWS_UP
|
2014-04-12 09:25:23 +04:00
|
|
|
/*
|
|
|
|
* copyargs() fills argc/argv/envp from the lower address even on
|
|
|
|
* __MACHINE_STACK_GROWS_UP machines. Reserve a few words just below the SP
|
|
|
|
* so that _rtld() use it.
|
|
|
|
*/
|
2008-01-20 13:15:50 +03:00
|
|
|
#define RTLD_GAP 32
|
|
|
|
#else
|
|
|
|
#define RTLD_GAP 0
|
|
|
|
#endif
|
|
|
|
|
2014-04-15 19:50:16 +04:00
|
|
|
const size_t argenvstrlen = (char *)ALIGN(dp) - data->ed_argp;
|
2014-04-12 09:25:23 +04:00
|
|
|
|
2014-04-15 19:50:16 +04:00
|
|
|
data->ed_argslen = calcargs(data, argenvstrlen);
|
2014-04-12 09:25:23 +04:00
|
|
|
|
2016-05-22 17:26:09 +03:00
|
|
|
const size_t len = calcstack(data, pax_aslr_stack_gap(epp) + RTLD_GAP);
|
1994-06-29 10:29:24 +04:00
|
|
|
|
2014-04-15 19:50:16 +04:00
|
|
|
if (len > epp->ep_ssize) {
|
2012-02-12 03:16:15 +04:00
|
|
|
/* in effect, compare to initial limit */
|
2014-04-15 19:50:16 +04:00
|
|
|
DPRINTF(("%s: stack limit exceeded %zu\n", __func__, len));
|
2014-04-18 15:44:31 +04:00
|
|
|
error = ENOMEM;
|
1994-06-29 10:29:24 +04:00
|
|
|
goto bad;
|
|
|
|
}
|
2012-02-12 03:16:15 +04:00
|
|
|
/* adjust "active stack depth" for process VSZ */
|
2014-04-15 19:50:16 +04:00
|
|
|
epp->ep_ssize = len;
|
2012-02-12 03:16:15 +04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
2012-05-03 03:33:11 +04:00
|
|
|
bad:
|
|
|
|
/* free the vmspace-creation commands, and release their references */
|
2014-04-11 06:27:20 +04:00
|
|
|
kill_vmcmds(&epp->ep_vmcmds);
|
2012-05-03 03:33:11 +04:00
|
|
|
/* kill any opened file descriptor, if necessary */
|
2014-04-11 06:27:20 +04:00
|
|
|
if (epp->ep_flags & EXEC_HASFD) {
|
|
|
|
epp->ep_flags &= ~EXEC_HASFD;
|
|
|
|
fd_close(epp->ep_fd);
|
2012-05-03 03:33:11 +04:00
|
|
|
}
|
|
|
|
/* close and put the exec'd file */
|
2014-04-11 06:27:20 +04:00
|
|
|
vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
|
|
|
|
VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred);
|
|
|
|
vput(epp->ep_vp);
|
2012-05-03 03:33:11 +04:00
|
|
|
pool_put(&exec_pool, data->ed_argp);
|
|
|
|
|
|
|
|
freehdr:
|
2014-04-11 06:27:20 +04:00
|
|
|
kmem_free(epp->ep_hdr, epp->ep_hdrlen);
|
|
|
|
if (epp->ep_emul_root != NULL)
|
|
|
|
vrele(epp->ep_emul_root);
|
|
|
|
if (epp->ep_interp != NULL)
|
|
|
|
vrele(epp->ep_interp);
|
2012-05-03 03:33:11 +04:00
|
|
|
|
2012-02-12 03:16:15 +04:00
|
|
|
rw_exit(&exec_lock);
|
|
|
|
|
2012-05-03 03:33:11 +04:00
|
|
|
pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
|
|
|
|
pathbuf_destroy(data->ed_pathbuf);
|
|
|
|
PNBUF_PUT(data->ed_resolvedpathbuf);
|
|
|
|
|
|
|
|
clrflg:
|
|
|
|
rw_exit(&p->p_reflock);
|
2012-05-01 01:19:58 +04:00
|
|
|
|
2012-02-12 03:16:15 +04:00
|
|
|
if (modgen != module_gen && error == ENOEXEC) {
|
|
|
|
modgen = module_gen;
|
|
|
|
exec_autoload();
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
|
2015-10-02 19:54:15 +03:00
|
|
|
SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0);
|
2012-02-12 03:16:15 +04:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2014-04-16 06:22:38 +04:00
|
|
|
static int
|
|
|
|
execve_dovmcmds(struct lwp *l, struct execve_data * restrict data)
|
|
|
|
{
|
|
|
|
struct exec_package * const epp = &data->ed_pack;
|
|
|
|
struct proc *p = l->l_proc;
|
|
|
|
struct exec_vmcmd *base_vcp;
|
|
|
|
int error = 0;
|
2014-04-25 22:04:45 +04:00
|
|
|
size_t i;
|
2014-04-16 06:22:38 +04:00
|
|
|
|
|
|
|
/* record proc's vnode, for use by procfs and others */
|
|
|
|
if (p->p_textvp)
|
|
|
|
vrele(p->p_textvp);
|
|
|
|
vref(epp->ep_vp);
|
|
|
|
p->p_textvp = epp->ep_vp;
|
|
|
|
|
|
|
|
/* create the new process's VM space by running the vmcmds */
|
|
|
|
KASSERTMSG(epp->ep_vmcmds.evs_used != 0, "%s: no vmcmds", __func__);
|
|
|
|
|
2016-05-08 23:00:21 +03:00
|
|
|
#ifdef TRACE_EXEC
|
2014-04-16 06:22:38 +04:00
|
|
|
DUMPVMCMDS(epp, 0, 0);
|
2016-05-08 23:00:21 +03:00
|
|
|
#endif
|
2014-04-16 06:22:38 +04:00
|
|
|
|
|
|
|
base_vcp = NULL;
|
|
|
|
|
|
|
|
for (i = 0; i < epp->ep_vmcmds.evs_used && !error; i++) {
|
|
|
|
struct exec_vmcmd *vcp;
|
|
|
|
|
|
|
|
vcp = &epp->ep_vmcmds.evs_cmds[i];
|
|
|
|
if (vcp->ev_flags & VMCMD_RELATIVE) {
|
|
|
|
KASSERTMSG(base_vcp != NULL,
|
|
|
|
"%s: relative vmcmd with no base", __func__);
|
|
|
|
KASSERTMSG((vcp->ev_flags & VMCMD_BASE) == 0,
|
|
|
|
"%s: illegal base & relative vmcmd", __func__);
|
|
|
|
vcp->ev_addr += base_vcp->ev_addr;
|
|
|
|
}
|
|
|
|
error = (*vcp->ev_proc)(l, vcp);
|
|
|
|
if (error)
|
|
|
|
DUMPVMCMDS(epp, i, error);
|
|
|
|
if (vcp->ev_flags & VMCMD_BASE)
|
|
|
|
base_vcp = vcp;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* free the vmspace-creation commands, and release their references */
|
|
|
|
kill_vmcmds(&epp->ep_vmcmds);
|
|
|
|
|
|
|
|
vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
|
|
|
|
VOP_CLOSE(epp->ep_vp, FREAD, l->l_cred);
|
|
|
|
vput(epp->ep_vp);
|
|
|
|
|
|
|
|
/* if an error happened, deallocate and punt */
|
|
|
|
if (error != 0) {
|
|
|
|
DPRINTF(("%s: vmcmd %zu failed: %d\n", __func__, i - 1, error));
|
|
|
|
}
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2012-05-03 03:33:11 +04:00
|
|
|
static void
|
|
|
|
execve_free_data(struct execve_data *data)
|
|
|
|
{
|
2014-04-11 06:27:20 +04:00
|
|
|
struct exec_package * const epp = &data->ed_pack;
|
2012-05-03 03:33:11 +04:00
|
|
|
|
|
|
|
/* free the vmspace-creation commands, and release their references */
|
2014-04-11 06:27:20 +04:00
|
|
|
kill_vmcmds(&epp->ep_vmcmds);
|
2012-05-03 03:33:11 +04:00
|
|
|
/* kill any opened file descriptor, if necessary */
|
2014-04-11 06:27:20 +04:00
|
|
|
if (epp->ep_flags & EXEC_HASFD) {
|
|
|
|
epp->ep_flags &= ~EXEC_HASFD;
|
|
|
|
fd_close(epp->ep_fd);
|
2012-05-03 03:33:11 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* close and put the exec'd file */
|
2014-04-11 06:27:20 +04:00
|
|
|
vn_lock(epp->ep_vp, LK_EXCLUSIVE | LK_RETRY);
|
|
|
|
VOP_CLOSE(epp->ep_vp, FREAD, curlwp->l_cred);
|
|
|
|
vput(epp->ep_vp);
|
2012-05-03 03:33:11 +04:00
|
|
|
pool_put(&exec_pool, data->ed_argp);
|
|
|
|
|
2014-04-11 06:27:20 +04:00
|
|
|
kmem_free(epp->ep_hdr, epp->ep_hdrlen);
|
|
|
|
if (epp->ep_emul_root != NULL)
|
|
|
|
vrele(epp->ep_emul_root);
|
|
|
|
if (epp->ep_interp != NULL)
|
|
|
|
vrele(epp->ep_interp);
|
2012-05-03 03:33:11 +04:00
|
|
|
|
|
|
|
pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
|
|
|
|
pathbuf_destroy(data->ed_pathbuf);
|
|
|
|
PNBUF_PUT(data->ed_resolvedpathbuf);
|
|
|
|
}
|
|
|
|
|
2014-04-16 05:30:33 +04:00
|
|
|
static void
|
2017-11-07 22:44:04 +03:00
|
|
|
pathexec(struct proc *p, const char *resolvedname)
|
2014-04-16 05:30:33 +04:00
|
|
|
{
|
2017-11-07 22:44:04 +03:00
|
|
|
KASSERT(resolvedname[0] == '/');
|
2014-04-16 05:30:33 +04:00
|
|
|
|
|
|
|
/* set command name & other accounting info */
|
2017-11-07 22:44:04 +03:00
|
|
|
strlcpy(p->p_comm, strrchr(resolvedname, '/') + 1, sizeof(p->p_comm));
|
2014-04-16 05:30:33 +04:00
|
|
|
|
2017-11-07 22:44:04 +03:00
|
|
|
kmem_strfree(p->p_path);
|
|
|
|
p->p_path = kmem_strdupsize(resolvedname, NULL, KM_SLEEP);
|
2014-04-16 05:30:33 +04:00
|
|
|
}
|
|
|
|
|
2014-04-12 10:31:27 +04:00
|
|
|
/* XXX elsewhere */
|
|
|
|
static int
|
|
|
|
credexec(struct lwp *l, struct vattr *attr)
|
|
|
|
{
|
|
|
|
struct proc *p = l->l_proc;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Deal with set[ug]id. MNT_NOSUID has already been used to disable
|
|
|
|
* s[ug]id. It's OK to check for PSL_TRACED here as we have blocked
|
|
|
|
* out additional references on the process for the moment.
|
|
|
|
*/
|
|
|
|
if ((p->p_slflag & PSL_TRACED) == 0 &&
|
|
|
|
|
|
|
|
(((attr->va_mode & S_ISUID) != 0 &&
|
|
|
|
kauth_cred_geteuid(l->l_cred) != attr->va_uid) ||
|
|
|
|
|
|
|
|
((attr->va_mode & S_ISGID) != 0 &&
|
|
|
|
kauth_cred_getegid(l->l_cred) != attr->va_gid))) {
|
|
|
|
/*
|
|
|
|
* Mark the process as SUGID before we do
|
|
|
|
* anything that might block.
|
|
|
|
*/
|
|
|
|
proc_crmod_enter();
|
|
|
|
proc_crmod_leave(NULL, NULL, true);
|
|
|
|
|
|
|
|
/* Make sure file descriptors 0..2 are in use. */
|
|
|
|
if ((error = fd_checkstd()) != 0) {
|
|
|
|
DPRINTF(("%s: fdcheckstd failed %d\n",
|
|
|
|
__func__, error));
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy the credential so other references don't see our
|
|
|
|
* changes.
|
|
|
|
*/
|
|
|
|
l->l_cred = kauth_cred_copy(l->l_cred);
|
|
|
|
#ifdef KTRACE
|
|
|
|
/*
|
|
|
|
* If the persistent trace flag isn't set, turn off.
|
|
|
|
*/
|
|
|
|
if (p->p_tracep) {
|
|
|
|
mutex_enter(&ktrace_lock);
|
|
|
|
if (!(p->p_traceflag & KTRFAC_PERSISTENT))
|
|
|
|
ktrderef(p);
|
|
|
|
mutex_exit(&ktrace_lock);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if (attr->va_mode & S_ISUID)
|
|
|
|
kauth_cred_seteuid(l->l_cred, attr->va_uid);
|
|
|
|
if (attr->va_mode & S_ISGID)
|
|
|
|
kauth_cred_setegid(l->l_cred, attr->va_gid);
|
|
|
|
} else {
|
|
|
|
if (kauth_cred_geteuid(l->l_cred) ==
|
|
|
|
kauth_cred_getuid(l->l_cred) &&
|
|
|
|
kauth_cred_getegid(l->l_cred) ==
|
|
|
|
kauth_cred_getgid(l->l_cred))
|
|
|
|
p->p_flag &= ~PK_SUGID;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy the credential so other references don't see our changes.
|
|
|
|
* Test to see if this is necessary first, since in the common case
|
|
|
|
* we won't need a private reference.
|
|
|
|
*/
|
|
|
|
if (kauth_cred_geteuid(l->l_cred) != kauth_cred_getsvuid(l->l_cred) ||
|
|
|
|
kauth_cred_getegid(l->l_cred) != kauth_cred_getsvgid(l->l_cred)) {
|
|
|
|
l->l_cred = kauth_cred_copy(l->l_cred);
|
|
|
|
kauth_cred_setsvuid(l->l_cred, kauth_cred_geteuid(l->l_cred));
|
|
|
|
kauth_cred_setsvgid(l->l_cred, kauth_cred_getegid(l->l_cred));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Update the master credentials. */
|
|
|
|
if (l->l_cred != p->p_cred) {
|
|
|
|
kauth_cred_t ocred;
|
|
|
|
|
|
|
|
kauth_cred_hold(l->l_cred);
|
|
|
|
mutex_enter(p->p_lock);
|
|
|
|
ocred = p->p_cred;
|
|
|
|
p->p_cred = l->l_cred;
|
|
|
|
mutex_exit(p->p_lock);
|
|
|
|
kauth_cred_free(ocred);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-04-20 04:20:01 +04:00
|
|
|
static void
|
|
|
|
emulexec(struct lwp *l, struct exec_package *epp)
|
|
|
|
{
|
|
|
|
struct proc *p = l->l_proc;
|
|
|
|
|
|
|
|
/* The emulation root will usually have been found when we looked
|
|
|
|
* for the elf interpreter (or similar), if not look now. */
|
|
|
|
if (epp->ep_esch->es_emul->e_path != NULL &&
|
|
|
|
epp->ep_emul_root == NULL)
|
|
|
|
emul_find_root(l, epp);
|
|
|
|
|
|
|
|
/* Any old emulation root got removed by fdcloseexec */
|
|
|
|
rw_enter(&p->p_cwdi->cwdi_lock, RW_WRITER);
|
|
|
|
p->p_cwdi->cwdi_edir = epp->ep_emul_root;
|
|
|
|
rw_exit(&p->p_cwdi->cwdi_lock);
|
|
|
|
epp->ep_emul_root = NULL;
|
|
|
|
if (epp->ep_interp != NULL)
|
|
|
|
vrele(epp->ep_interp);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Call emulation specific exec hook. This can setup per-process
|
|
|
|
* p->p_emuldata or do any other per-process stuff an emulation needs.
|
|
|
|
*
|
|
|
|
* If we are executing process of different emulation than the
|
|
|
|
* original forked process, call e_proc_exit() of the old emulation
|
|
|
|
* first, then e_proc_exec() of new emulation. If the emulation is
|
|
|
|
* same, the exec hook code should deallocate any old emulation
|
|
|
|
* resources held previously by this process.
|
|
|
|
*/
|
|
|
|
if (p->p_emul && p->p_emul->e_proc_exit
|
|
|
|
&& p->p_emul != epp->ep_esch->es_emul)
|
|
|
|
(*p->p_emul->e_proc_exit)(p);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This is now LWP 1.
|
|
|
|
*/
|
|
|
|
/* XXX elsewhere */
|
|
|
|
mutex_enter(p->p_lock);
|
|
|
|
p->p_nlwpid = 1;
|
|
|
|
l->l_lid = 1;
|
|
|
|
mutex_exit(p->p_lock);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Call exec hook. Emulation code may NOT store reference to anything
|
|
|
|
* from &pack.
|
|
|
|
*/
|
|
|
|
if (epp->ep_esch->es_emul->e_proc_exec)
|
|
|
|
(*epp->ep_esch->es_emul->e_proc_exec)(p, epp);
|
|
|
|
|
|
|
|
/* update p_emul, the old value is no longer needed */
|
|
|
|
p->p_emul = epp->ep_esch->es_emul;
|
|
|
|
|
|
|
|
/* ...and the same for p_execsw */
|
|
|
|
p->p_execsw = epp->ep_esch;
|
|
|
|
|
|
|
|
#ifdef __HAVE_SYSCALL_INTERN
|
|
|
|
(*p->p_emul->e_syscall_intern)(p);
|
|
|
|
#endif
|
|
|
|
ktremul();
|
|
|
|
}
|
|
|
|
|
2012-02-12 03:16:15 +04:00
|
|
|
static int
|
2012-04-08 15:27:44 +04:00
|
|
|
execve_runproc(struct lwp *l, struct execve_data * restrict data,
|
|
|
|
bool no_local_exec_lock, bool is_spawn)
|
2012-02-12 03:16:15 +04:00
|
|
|
{
|
2014-04-11 06:27:20 +04:00
|
|
|
struct exec_package * const epp = &data->ed_pack;
|
2012-05-03 03:33:11 +04:00
|
|
|
int error = 0;
|
|
|
|
struct proc *p;
|
2012-02-12 03:16:15 +04:00
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
/*
|
|
|
|
* In case of a posix_spawn operation, the child doing the exec
|
|
|
|
* might not hold the reader lock on exec_lock, but the parent
|
|
|
|
* will do this instead.
|
|
|
|
*/
|
|
|
|
KASSERT(no_local_exec_lock || rw_lock_held(&exec_lock));
|
2014-04-11 15:32:14 +04:00
|
|
|
KASSERT(!no_local_exec_lock || is_spawn);
|
2012-02-12 03:16:15 +04:00
|
|
|
KASSERT(data != NULL);
|
2012-05-03 03:33:11 +04:00
|
|
|
|
|
|
|
p = l->l_proc;
|
2012-02-12 03:16:15 +04:00
|
|
|
|
2007-02-10 00:55:00 +03:00
|
|
|
/* Get rid of other LWPs. */
|
2012-02-20 01:05:51 +04:00
|
|
|
if (p->p_nlwps > 1) {
|
2008-04-24 22:39:20 +04:00
|
|
|
mutex_enter(p->p_lock);
|
2007-02-10 00:55:00 +03:00
|
|
|
exit_lwps(l);
|
2008-04-24 22:39:20 +04:00
|
|
|
mutex_exit(p->p_lock);
|
2007-02-10 00:55:00 +03:00
|
|
|
}
|
2003-01-18 13:06:22 +03:00
|
|
|
KDASSERT(p->p_nlwps == 1);
|
|
|
|
|
2007-11-13 02:11:58 +03:00
|
|
|
/* Destroy any lwpctl info. */
|
|
|
|
if (p->p_lwpctl != NULL)
|
|
|
|
lwp_ctl_exit();
|
|
|
|
|
2003-01-18 13:06:22 +03:00
|
|
|
/* Remove POSIX timers */
|
|
|
|
timers_free(p, TIMERS_POSIX);
|
|
|
|
|
2015-09-26 19:12:24 +03:00
|
|
|
/* Set the PaX flags. */
|
2016-05-25 23:07:54 +03:00
|
|
|
pax_set_flags(epp, p);
|
2015-09-26 19:12:24 +03:00
|
|
|
|
1997-12-31 10:47:41 +03:00
|
|
|
/*
|
|
|
|
* Do whatever is necessary to prepare the address space
|
|
|
|
* for remapping. Note that this might replace the current
|
|
|
|
* vmspace with another!
|
|
|
|
*/
|
2012-04-08 15:27:44 +04:00
|
|
|
if (is_spawn)
|
2014-04-11 06:27:20 +04:00
|
|
|
uvmspace_spawn(l, epp->ep_vm_minaddr,
|
|
|
|
epp->ep_vm_maxaddr,
|
|
|
|
epp->ep_flags & EXEC_TOPDOWN_VM);
|
2012-04-08 15:27:44 +04:00
|
|
|
else
|
2014-04-11 06:27:20 +04:00
|
|
|
uvmspace_exec(l, epp->ep_vm_minaddr,
|
|
|
|
epp->ep_vm_maxaddr,
|
|
|
|
epp->ep_flags & EXEC_TOPDOWN_VM);
|
1994-06-29 10:29:24 +04:00
|
|
|
|
2014-04-11 22:02:33 +04:00
|
|
|
struct vmspace *vm;
|
1997-12-31 10:47:41 +03:00
|
|
|
vm = p->p_vmspace;
|
2014-04-11 06:27:20 +04:00
|
|
|
vm->vm_taddr = (void *)epp->ep_taddr;
|
|
|
|
vm->vm_tsize = btoc(epp->ep_tsize);
|
|
|
|
vm->vm_daddr = (void*)epp->ep_daddr;
|
|
|
|
vm->vm_dsize = btoc(epp->ep_dsize);
|
|
|
|
vm->vm_ssize = btoc(epp->ep_ssize);
|
- add new RLIMIT_AS (aka RLIMIT_VMEM) resource that limits the total
address space available to processes. this limit exists in most other
modern unix variants, and like most of them, our defaults are unlimited.
remove the old mmap / rlimit.datasize hack.
- adds the VMCMD_STACK flag to all the stack-creation vmcmd callers.
it is currently unused, but was added a few years ago.
- add a pair of new process size values to kinfo_proc2{}. one is the
total size of the process memory map, and the other is the total size
adjusted for unused stack space (since most processes have a lot of
this...)
- patch sh, and csh to notice RLIMIT_AS. (in some cases, the alias
RLIMIT_VMEM was already present and used if availble.)
- patch ps, top and systat to notice the new k_vm_vsize member of
kinfo_proc2{}.
- update irix, svr4, svr4_32, linux and osf1 emulations to support
this information. (freebsd could be done, but that it's best left
as part of the full-update of compat/freebsd.)
this addresses PR 7897. it also gives correct memory usage values,
which have never been entirely correct (since mmap), and have been
very incorrect since jemalloc() was enabled.
tested on i386 and sparc64, build tested on several other platforms.
thanks to many folks for feedback and testing but most espcially
chuq and yamt for critical suggestions that lead to this patch not
having a special ugliness i wasn't happy with anyway :-)
2009-03-29 05:02:48 +04:00
|
|
|
vm->vm_issize = 0;
|
2014-04-11 06:27:20 +04:00
|
|
|
vm->vm_maxsaddr = (void *)epp->ep_maxsaddr;
|
|
|
|
vm->vm_minsaddr = (void *)epp->ep_minsaddr;
|
1994-06-29 10:29:24 +04:00
|
|
|
|
2016-03-20 17:58:10 +03:00
|
|
|
pax_aslr_init_vm(l, vm, epp);
|
2007-12-27 01:11:47 +03:00
|
|
|
|
2014-04-16 06:22:38 +04:00
|
|
|
/* Now map address space. */
|
|
|
|
error = execve_dovmcmds(l, data);
|
|
|
|
if (error != 0)
|
1994-06-29 10:29:24 +04:00
|
|
|
goto exec_abort;
|
|
|
|
|
2017-11-13 23:38:31 +03:00
|
|
|
pathexec(p, epp->ep_resolvedname);
|
2007-12-03 05:06:57 +03:00
|
|
|
|
2014-04-15 20:13:04 +04:00
|
|
|
char * const newstack = STACK_GROW(vm->vm_minsaddr, epp->ep_ssize);
|
2014-04-12 09:25:23 +04:00
|
|
|
|
2014-04-15 21:06:21 +04:00
|
|
|
error = copyoutargs(data, l, newstack);
|
2014-04-15 20:44:57 +04:00
|
|
|
if (error != 0)
|
1994-06-29 10:29:24 +04:00
|
|
|
goto exec_abort;
|
|
|
|
|
2011-02-15 19:49:54 +03:00
|
|
|
cwdexec(p);
|
2008-03-22 00:54:58 +03:00
|
|
|
fd_closeexec(); /* handle close on exec */
|
2011-06-02 01:24:59 +04:00
|
|
|
|
|
|
|
if (__predict_false(ktrace_on))
|
|
|
|
fd_ktrexecfd();
|
|
|
|
|
2016-11-04 01:08:30 +03:00
|
|
|
execsigs(p); /* reset caught signals */
|
2004-03-05 14:30:50 +03:00
|
|
|
|
2014-04-11 15:21:29 +04:00
|
|
|
mutex_enter(p->p_lock);
|
2003-01-18 13:06:22 +03:00
|
|
|
l->l_ctxlink = NULL; /* reset ucontext link */
|
2007-12-03 05:06:57 +03:00
|
|
|
p->p_acflag &= ~AFORK;
|
2007-02-18 01:31:36 +03:00
|
|
|
p->p_flag |= PK_EXEC;
|
2008-04-24 22:39:20 +04:00
|
|
|
mutex_exit(p->p_lock);
|
2007-02-10 00:55:00 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Stop profiling.
|
|
|
|
*/
|
|
|
|
if ((p->p_stflag & PST_PROFIL) != 0) {
|
|
|
|
mutex_spin_enter(&p->p_stmutex);
|
|
|
|
stopprofclock(p);
|
|
|
|
mutex_spin_exit(&p->p_stmutex);
|
1994-06-29 10:29:24 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2008-06-16 13:51:14 +04:00
|
|
|
* It's OK to test PL_PPWAIT unlocked here, as other LWPs have
|
2007-02-10 00:55:00 +03:00
|
|
|
* exited and exec()/exit() are the only places it will be cleared.
|
1994-06-29 10:29:24 +04:00
|
|
|
*/
|
2008-06-16 13:51:14 +04:00
|
|
|
if ((p->p_lflag & PL_PPWAIT) != 0) {
|
2012-07-28 00:52:49 +04:00
|
|
|
mutex_enter(proc_lock);
|
|
|
|
l->l_lwpctl = NULL; /* was on loan from blocked parent */
|
|
|
|
p->p_lflag &= ~PL_PPWAIT;
|
|
|
|
cv_broadcast(&p->p_pptr->p_waitcv);
|
|
|
|
mutex_exit(proc_lock);
|
2007-02-10 00:55:00 +03:00
|
|
|
}
|
|
|
|
|
2014-04-12 10:31:27 +04:00
|
|
|
error = credexec(l, &data->ed_attr);
|
|
|
|
if (error)
|
|
|
|
goto exec_abort;
|
2006-07-20 01:11:37 +04:00
|
|
|
|
2002-08-28 11:16:33 +04:00
|
|
|
#if defined(__HAVE_RAS)
|
|
|
|
/*
|
|
|
|
* Remove all RASs from the address space.
|
|
|
|
*/
|
2007-10-24 18:50:38 +04:00
|
|
|
ras_purgeall();
|
2002-08-28 11:16:33 +04:00
|
|
|
#endif
|
|
|
|
|
2000-01-25 04:15:14 +03:00
|
|
|
doexechooks(p);
|
|
|
|
|
2014-04-12 19:08:56 +04:00
|
|
|
/*
|
|
|
|
* Set initial SP at the top of the stack.
|
|
|
|
*
|
|
|
|
* Note that on machines where stack grows up (e.g. hppa), SP points to
|
|
|
|
* the end of arg/env strings. Userland guesses the address of argc
|
|
|
|
* via ps_strings::ps_argvstr.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Setup new registers and do misc. setup. */
|
2014-04-15 20:13:04 +04:00
|
|
|
(*epp->ep_esch->es_emul->e_setregs)(l, epp, (vaddr_t)newstack);
|
2014-04-11 06:27:20 +04:00
|
|
|
if (epp->ep_esch->es_setregs)
|
2014-04-15 20:13:04 +04:00
|
|
|
(*epp->ep_esch->es_setregs)(l, epp, (vaddr_t)newstack);
|
1994-06-29 10:29:24 +04:00
|
|
|
|
2011-03-01 21:53:10 +03:00
|
|
|
/* Provide a consistent LWP private setting */
|
|
|
|
(void)lwp_setprivate(l, NULL);
|
|
|
|
|
2011-06-07 02:04:34 +04:00
|
|
|
/* Discard all PCU state; need to start fresh */
|
|
|
|
pcu_discard_all(l);
|
|
|
|
|
2003-08-24 21:52:28 +04:00
|
|
|
/* map the process's signal trampoline code */
|
2014-04-11 06:27:20 +04:00
|
|
|
if ((error = exec_sigcode_map(p, epp->ep_esch->es_emul)) != 0) {
|
2011-03-14 02:44:14 +03:00
|
|
|
DPRINTF(("%s: map sigcode failed %d\n", __func__, error));
|
2003-08-24 21:52:28 +04:00
|
|
|
goto exec_abort;
|
2005-08-19 06:04:02 +04:00
|
|
|
}
|
2003-08-24 21:52:28 +04:00
|
|
|
|
2012-02-12 03:16:15 +04:00
|
|
|
pool_put(&exec_pool, data->ed_argp);
|
2008-06-24 22:04:52 +04:00
|
|
|
|
|
|
|
/* notify others that we exec'd */
|
|
|
|
KNOTE(&p->p_klist, NOTE_EXEC);
|
|
|
|
|
2014-04-11 06:27:20 +04:00
|
|
|
kmem_free(epp->ep_hdr, epp->ep_hdrlen);
|
1995-07-19 19:19:08 +04:00
|
|
|
|
2015-10-02 19:54:15 +03:00
|
|
|
SDT_PROBE(proc, kernel, , exec__success, epp->ep_kname, 0, 0, 0, 0);
|
DTrace: Add an SDT (Statically Defined Tracing) provider framework, and
implement most of the proc provider. Adds proc:::create, exec,
exec_success, exec_faillure, signal_send, signal_discard, signal_handle,
lwp_create, lwp_start, lwp_exit.
2010-03-02 00:10:13 +03:00
|
|
|
|
2014-04-20 04:20:01 +04:00
|
|
|
emulexec(l, epp);
|
1997-09-12 03:01:44 +04:00
|
|
|
|
2007-11-07 03:23:13 +03:00
|
|
|
/* Allow new references from the debugger/procfs. */
|
2012-02-20 16:19:55 +04:00
|
|
|
rw_exit(&p->p_reflock);
|
2012-04-08 15:27:44 +04:00
|
|
|
if (!no_local_exec_lock)
|
|
|
|
rw_exit(&exec_lock);
|
2002-11-07 03:22:28 +03:00
|
|
|
|
2008-04-24 19:35:27 +04:00
|
|
|
mutex_enter(proc_lock);
|
2002-11-07 03:22:28 +03:00
|
|
|
|
2007-02-10 00:55:00 +03:00
|
|
|
if ((p->p_slflag & (PSL_TRACED|PSL_SYSCALL)) == PSL_TRACED) {
|
2014-04-11 21:06:02 +04:00
|
|
|
ksiginfo_t ksi;
|
|
|
|
|
2007-02-10 00:55:00 +03:00
|
|
|
KSI_INIT_EMPTY(&ksi);
|
|
|
|
ksi.ksi_signo = SIGTRAP;
|
2017-01-07 01:42:58 +03:00
|
|
|
ksi.ksi_code = TRAP_EXEC;
|
2007-02-10 00:55:00 +03:00
|
|
|
ksi.ksi_lid = l->l_lid;
|
|
|
|
kpsignal(p, &ksi, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (p->p_sflag & PS_STOPEXEC) {
|
2014-04-11 21:06:02 +04:00
|
|
|
ksiginfoq_t kq;
|
|
|
|
|
2007-02-10 00:55:00 +03:00
|
|
|
KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
|
2003-11-13 00:07:37 +03:00
|
|
|
p->p_pptr->p_nstopchild++;
|
In execve_runproc(), update the p_waited entry for the process being
moved to SSTOP state, not for its parent. (It is correct to update
the parent's p_nstopchild count.) If the value is not already zero,
it could prevent its parent from waiting for the process.
Fixes PR kern/50298
Pullups will be requested for:
NetBSD-7, -6, -6-0, -6-1, -5, -5-0, -5-1, and -5-2
2015-10-13 03:24:35 +03:00
|
|
|
p->p_waited = 0;
|
2008-04-24 22:39:20 +04:00
|
|
|
mutex_enter(p->p_lock);
|
2007-02-10 00:55:00 +03:00
|
|
|
ksiginfo_queue_init(&kq);
|
|
|
|
sigclearall(p, &contsigmask, &kq);
|
|
|
|
lwp_lock(l);
|
2003-01-18 13:06:22 +03:00
|
|
|
l->l_stat = LSSTOP;
|
2007-02-10 00:55:00 +03:00
|
|
|
p->p_stat = SSTOP;
|
2003-01-18 13:06:22 +03:00
|
|
|
p->p_nrlwps--;
|
2010-12-18 04:36:19 +03:00
|
|
|
lwp_unlock(l);
|
2008-04-24 22:39:20 +04:00
|
|
|
mutex_exit(p->p_lock);
|
2008-04-24 19:35:27 +04:00
|
|
|
mutex_exit(proc_lock);
|
2010-12-18 04:36:19 +03:00
|
|
|
lwp_lock(l);
|
2007-05-17 18:51:11 +04:00
|
|
|
mi_switch(l);
|
2007-02-10 00:55:00 +03:00
|
|
|
ksiginfo_queue_drain(&kq);
|
|
|
|
KERNEL_LOCK(l->l_biglocks, l);
|
|
|
|
} else {
|
2008-04-24 19:35:27 +04:00
|
|
|
mutex_exit(proc_lock);
|
2002-11-07 03:22:28 +03:00
|
|
|
}
|
|
|
|
|
2012-02-12 03:16:15 +04:00
|
|
|
pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
|
|
|
|
pathbuf_destroy(data->ed_pathbuf);
|
|
|
|
PNBUF_PUT(data->ed_resolvedpathbuf);
|
2016-05-08 23:00:21 +03:00
|
|
|
#ifdef TRACE_EXEC
|
2011-08-27 22:07:10 +04:00
|
|
|
DPRINTF(("%s finished\n", __func__));
|
2016-05-08 23:00:21 +03:00
|
|
|
#endif
|
2014-02-02 18:50:46 +04:00
|
|
|
return EJUSTRETURN;
|
1994-06-29 10:29:24 +04:00
|
|
|
|
2001-02-26 23:43:25 +03:00
|
|
|
exec_abort:
|
2015-10-02 19:54:15 +03:00
|
|
|
SDT_PROBE(proc, kernel, , exec__failure, error, 0, 0, 0, 0);
|
2007-11-07 03:23:13 +03:00
|
|
|
rw_exit(&p->p_reflock);
|
2012-04-08 15:27:44 +04:00
|
|
|
if (!no_local_exec_lock)
|
|
|
|
rw_exit(&exec_lock);
|
2000-12-08 22:42:11 +03:00
|
|
|
|
2012-05-03 03:33:11 +04:00
|
|
|
pathbuf_stringcopy_put(data->ed_pathbuf, data->ed_pathstring);
|
|
|
|
pathbuf_destroy(data->ed_pathbuf);
|
|
|
|
PNBUF_PUT(data->ed_resolvedpathbuf);
|
|
|
|
|
1994-06-29 10:29:24 +04:00
|
|
|
/*
|
|
|
|
* the old process doesn't exist anymore. exit gracefully.
|
|
|
|
* get rid of the (new) address space we have created, if any, get rid
|
|
|
|
* of our namei data and vnode, and exit noting failure
|
|
|
|
*/
|
1998-02-05 10:59:28 +03:00
|
|
|
uvm_deallocate(&vm->vm_map, VM_MIN_ADDRESS,
|
2012-05-03 03:33:11 +04:00
|
|
|
VM_MAXUSER_ADDRESS - VM_MIN_ADDRESS);
|
2012-04-08 15:27:44 +04:00
|
|
|
|
2014-04-11 06:27:20 +04:00
|
|
|
exec_free_emul_arg(epp);
|
2012-02-12 03:16:15 +04:00
|
|
|
pool_put(&exec_pool, data->ed_argp);
|
2014-04-11 06:27:20 +04:00
|
|
|
kmem_free(epp->ep_hdr, epp->ep_hdrlen);
|
|
|
|
if (epp->ep_emul_root != NULL)
|
|
|
|
vrele(epp->ep_emul_root);
|
|
|
|
if (epp->ep_interp != NULL)
|
|
|
|
vrele(epp->ep_interp);
|
2007-02-10 00:55:00 +03:00
|
|
|
|
2007-11-07 03:23:13 +03:00
|
|
|
/* Acquire the sched-state mutex (exit1() will release it). */
|
2012-04-08 15:27:44 +04:00
|
|
|
if (!is_spawn) {
|
2012-02-12 03:16:15 +04:00
|
|
|
mutex_enter(p->p_lock);
|
2016-04-05 02:07:06 +03:00
|
|
|
exit1(l, error, SIGABRT);
|
2012-02-12 03:16:15 +04:00
|
|
|
}
|
1994-06-29 10:29:24 +04:00
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
return error;
|
1994-06-29 10:29:24 +04:00
|
|
|
}
|
1995-04-22 23:42:47 +04:00
|
|
|
|
2012-02-12 03:16:15 +04:00
|
|
|
int
|
|
|
|
execve1(struct lwp *l, const char *path, char * const *args,
|
|
|
|
char * const *envs, execve_fetch_element_t fetch_element)
|
|
|
|
{
|
|
|
|
struct execve_data data;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = execve_loadvm(l, path, args, envs, fetch_element, &data);
|
|
|
|
if (error)
|
|
|
|
return error;
|
2012-04-08 15:27:44 +04:00
|
|
|
error = execve_runproc(l, &data, false, false);
|
2012-02-12 03:16:15 +04:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2014-12-15 00:35:24 +03:00
|
|
|
static size_t
|
|
|
|
fromptrsz(const struct exec_package *epp)
|
|
|
|
{
|
|
|
|
return (epp->ep_flags & EXEC_FROM32) ? sizeof(int) : sizeof(char *);
|
|
|
|
}
|
|
|
|
|
2014-10-25 01:13:30 +04:00
|
|
|
static size_t
|
|
|
|
ptrsz(const struct exec_package *epp)
|
|
|
|
{
|
2014-12-15 00:35:24 +03:00
|
|
|
return (epp->ep_flags & EXEC_32) ? sizeof(int) : sizeof(char *);
|
2014-10-25 01:13:30 +04:00
|
|
|
}
|
|
|
|
|
2014-04-15 19:50:16 +04:00
|
|
|
static size_t
|
|
|
|
calcargs(struct execve_data * restrict data, const size_t argenvstrlen)
|
|
|
|
{
|
|
|
|
struct exec_package * const epp = &data->ed_pack;
|
|
|
|
|
|
|
|
const size_t nargenvptrs =
|
2014-04-18 10:59:32 +04:00
|
|
|
1 + /* long argc */
|
2014-04-15 19:50:16 +04:00
|
|
|
data->ed_argc + /* char *argv[] */
|
|
|
|
1 + /* \0 */
|
|
|
|
data->ed_envc + /* char *env[] */
|
2017-01-25 20:57:14 +03:00
|
|
|
1; /* \0 */
|
2014-04-15 19:50:16 +04:00
|
|
|
|
2017-01-25 20:57:14 +03:00
|
|
|
return (nargenvptrs * ptrsz(epp)) /* pointers */
|
|
|
|
+ argenvstrlen /* strings */
|
|
|
|
+ epp->ep_esch->es_arglen; /* auxinfo */
|
2014-04-15 19:50:16 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static size_t
|
|
|
|
calcstack(struct execve_data * restrict data, const size_t gaplen)
|
|
|
|
{
|
|
|
|
struct exec_package * const epp = &data->ed_pack;
|
|
|
|
|
|
|
|
data->ed_szsigcode = epp->ep_esch->es_emul->e_esigcode -
|
|
|
|
epp->ep_esch->es_emul->e_sigcode;
|
|
|
|
|
|
|
|
data->ed_ps_strings_sz = (epp->ep_flags & EXEC_32) ?
|
|
|
|
sizeof(struct ps_strings32) : sizeof(struct ps_strings);
|
|
|
|
|
|
|
|
const size_t sigcode_psstr_sz =
|
|
|
|
data->ed_szsigcode + /* sigcode */
|
|
|
|
data->ed_ps_strings_sz + /* ps_strings */
|
|
|
|
STACK_PTHREADSPACE; /* pthread space */
|
|
|
|
|
|
|
|
const size_t stacklen =
|
|
|
|
data->ed_argslen +
|
|
|
|
gaplen +
|
|
|
|
sigcode_psstr_sz;
|
|
|
|
|
|
|
|
/* make the stack "safely" aligned */
|
|
|
|
return STACK_LEN_ALIGN(stacklen, STACK_ALIGNBYTES);
|
|
|
|
}
|
|
|
|
|
2014-04-15 21:06:21 +04:00
|
|
|
static int
|
|
|
|
copyoutargs(struct execve_data * restrict data, struct lwp *l,
|
|
|
|
char * const newstack)
|
|
|
|
{
|
|
|
|
struct exec_package * const epp = &data->ed_pack;
|
|
|
|
struct proc *p = l->l_proc;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
/* remember information about the process */
|
|
|
|
data->ed_arginfo.ps_nargvstr = data->ed_argc;
|
|
|
|
data->ed_arginfo.ps_nenvstr = data->ed_envc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate the stack address passed to the newly execve()'ed process.
|
|
|
|
*
|
|
|
|
* The new stack address will be set to the SP (stack pointer) register
|
|
|
|
* in setregs().
|
|
|
|
*/
|
|
|
|
|
|
|
|
char *newargs = STACK_ALLOC(
|
|
|
|
STACK_SHRINK(newstack, data->ed_argslen), data->ed_argslen);
|
|
|
|
|
|
|
|
error = (*epp->ep_esch->es_copyargs)(l, epp,
|
|
|
|
&data->ed_arginfo, &newargs, data->ed_argp);
|
|
|
|
|
|
|
|
if (error) {
|
|
|
|
DPRINTF(("%s: copyargs failed %d\n", __func__, error));
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
error = copyoutpsstrs(data, p);
|
|
|
|
if (error != 0)
|
|
|
|
return error;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-04-15 20:44:57 +04:00
|
|
|
static int
|
|
|
|
copyoutpsstrs(struct execve_data * restrict data, struct proc *p)
|
|
|
|
{
|
|
|
|
struct exec_package * const epp = &data->ed_pack;
|
|
|
|
struct ps_strings32 arginfo32;
|
|
|
|
void *aip;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
/* fill process ps_strings info */
|
|
|
|
p->p_psstrp = (vaddr_t)STACK_ALLOC(STACK_GROW(epp->ep_minsaddr,
|
|
|
|
STACK_PTHREADSPACE), data->ed_ps_strings_sz);
|
|
|
|
|
|
|
|
if (epp->ep_flags & EXEC_32) {
|
|
|
|
aip = &arginfo32;
|
|
|
|
arginfo32.ps_argvstr = (vaddr_t)data->ed_arginfo.ps_argvstr;
|
|
|
|
arginfo32.ps_nargvstr = data->ed_arginfo.ps_nargvstr;
|
|
|
|
arginfo32.ps_envstr = (vaddr_t)data->ed_arginfo.ps_envstr;
|
|
|
|
arginfo32.ps_nenvstr = data->ed_arginfo.ps_nenvstr;
|
|
|
|
} else
|
|
|
|
aip = &data->ed_arginfo;
|
|
|
|
|
|
|
|
/* copy out the process's ps_strings structure */
|
|
|
|
if ((error = copyout(aip, (void *)p->p_psstrp, data->ed_ps_strings_sz))
|
|
|
|
!= 0) {
|
|
|
|
DPRINTF(("%s: ps_strings copyout %p->%p size %zu failed\n",
|
|
|
|
__func__, aip, (void *)p->p_psstrp, data->ed_ps_strings_sz));
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-04-13 10:03:49 +04:00
|
|
|
static int
|
|
|
|
copyinargs(struct execve_data * restrict data, char * const *args,
|
|
|
|
char * const *envs, execve_fetch_element_t fetch_element, char **dpp)
|
|
|
|
{
|
|
|
|
struct exec_package * const epp = &data->ed_pack;
|
2014-04-13 13:19:42 +04:00
|
|
|
char *dp;
|
2014-04-13 10:03:49 +04:00
|
|
|
size_t i;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
dp = *dpp;
|
|
|
|
|
|
|
|
data->ed_argc = 0;
|
|
|
|
|
|
|
|
/* copy the fake args list, if there's one, freeing it as we go */
|
|
|
|
if (epp->ep_flags & EXEC_HASARGL) {
|
2014-04-20 03:00:27 +04:00
|
|
|
struct exec_fakearg *fa = epp->ep_fa;
|
2014-04-13 10:03:49 +04:00
|
|
|
|
2014-04-20 03:00:27 +04:00
|
|
|
while (fa->fa_arg != NULL) {
|
2014-04-14 09:39:19 +04:00
|
|
|
const size_t maxlen = ARG_MAX - (dp - data->ed_argp);
|
|
|
|
size_t len;
|
|
|
|
|
2014-04-20 03:00:27 +04:00
|
|
|
len = strlcpy(dp, fa->fa_arg, maxlen);
|
2014-04-14 09:39:19 +04:00
|
|
|
/* Count NUL into len. */
|
|
|
|
if (len < maxlen)
|
|
|
|
len++;
|
2014-04-20 02:59:08 +04:00
|
|
|
else {
|
2014-04-20 03:00:27 +04:00
|
|
|
while (fa->fa_arg != NULL) {
|
|
|
|
kmem_free(fa->fa_arg, fa->fa_len);
|
|
|
|
fa++;
|
2014-04-20 02:59:08 +04:00
|
|
|
}
|
|
|
|
kmem_free(epp->ep_fa, epp->ep_fa_len);
|
|
|
|
epp->ep_flags &= ~EXEC_HASARGL;
|
2014-04-14 17:14:38 +04:00
|
|
|
return E2BIG;
|
2014-04-20 02:59:08 +04:00
|
|
|
}
|
2014-04-20 03:00:27 +04:00
|
|
|
ktrexecarg(fa->fa_arg, len - 1);
|
2014-04-14 09:39:19 +04:00
|
|
|
dp += len;
|
2014-04-13 10:03:49 +04:00
|
|
|
|
2014-04-20 03:00:27 +04:00
|
|
|
kmem_free(fa->fa_arg, fa->fa_len);
|
|
|
|
fa++;
|
2014-04-13 10:03:49 +04:00
|
|
|
data->ed_argc++;
|
|
|
|
}
|
|
|
|
kmem_free(epp->ep_fa, epp->ep_fa_len);
|
|
|
|
epp->ep_flags &= ~EXEC_HASARGL;
|
|
|
|
}
|
|
|
|
|
2014-04-13 13:19:42 +04:00
|
|
|
/*
|
|
|
|
* Read and count argument strings from user.
|
|
|
|
*/
|
|
|
|
|
2014-04-13 10:03:49 +04:00
|
|
|
if (args == NULL) {
|
|
|
|
DPRINTF(("%s: null args\n", __func__));
|
|
|
|
return EINVAL;
|
|
|
|
}
|
|
|
|
if (epp->ep_flags & EXEC_SKIPARG)
|
2014-12-15 00:35:24 +03:00
|
|
|
args = (const void *)((const char *)args + fromptrsz(epp));
|
2014-04-13 13:19:42 +04:00
|
|
|
i = 0;
|
|
|
|
error = copyinargstrs(data, args, fetch_element, &dp, &i, ktr_execarg);
|
|
|
|
if (error != 0) {
|
|
|
|
DPRINTF(("%s: copyin arg %d\n", __func__, error));
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
data->ed_argc += i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Read and count environment strings from user.
|
|
|
|
*/
|
2014-04-13 10:03:49 +04:00
|
|
|
|
2014-04-13 13:19:42 +04:00
|
|
|
data->ed_envc = 0;
|
|
|
|
/* environment need not be there */
|
|
|
|
if (envs == NULL)
|
|
|
|
goto done;
|
|
|
|
i = 0;
|
|
|
|
error = copyinargstrs(data, envs, fetch_element, &dp, &i, ktr_execenv);
|
|
|
|
if (error != 0) {
|
|
|
|
DPRINTF(("%s: copyin env %d\n", __func__, error));
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
data->ed_envc += i;
|
|
|
|
|
|
|
|
done:
|
|
|
|
*dpp = dp;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
copyinargstrs(struct execve_data * restrict data, char * const *strs,
|
|
|
|
execve_fetch_element_t fetch_element, char **dpp, size_t *ip,
|
|
|
|
void (*ktr)(const void *, size_t))
|
|
|
|
{
|
|
|
|
char *dp, *sp;
|
|
|
|
size_t i;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
dp = *dpp;
|
|
|
|
|
|
|
|
i = 0;
|
2014-04-13 10:03:49 +04:00
|
|
|
while (1) {
|
2014-04-14 09:39:19 +04:00
|
|
|
const size_t maxlen = ARG_MAX - (dp - data->ed_argp);
|
2014-04-13 10:03:49 +04:00
|
|
|
size_t len;
|
|
|
|
|
2014-04-13 13:19:42 +04:00
|
|
|
if ((error = (*fetch_element)(strs, i, &sp)) != 0) {
|
2014-04-13 10:03:49 +04:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
if (!sp)
|
|
|
|
break;
|
|
|
|
if ((error = copyinstr(sp, dp, maxlen, &len)) != 0) {
|
|
|
|
if (error == ENAMETOOLONG)
|
|
|
|
error = E2BIG;
|
|
|
|
return error;
|
|
|
|
}
|
2014-04-13 13:19:42 +04:00
|
|
|
if (__predict_false(ktrace_on))
|
|
|
|
(*ktr)(dp, len - 1);
|
2014-04-13 10:03:49 +04:00
|
|
|
dp += len;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
*dpp = dp;
|
2014-04-13 13:19:42 +04:00
|
|
|
*ip = i;
|
2014-04-13 10:03:49 +04:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-04-11 15:49:38 +04:00
|
|
|
/*
|
|
|
|
* Copy argv and env strings from kernel buffer (argp) to the new stack.
|
|
|
|
* Those strings are located just after auxinfo.
|
|
|
|
*/
|
2001-07-30 01:22:42 +04:00
|
|
|
int
|
2006-11-01 13:17:58 +03:00
|
|
|
copyargs(struct lwp *l, struct exec_package *pack, struct ps_strings *arginfo,
|
|
|
|
char **stackp, void *argp)
|
1995-04-22 23:42:47 +04:00
|
|
|
{
|
2001-02-26 23:43:25 +03:00
|
|
|
char **cpp, *dp, *sp;
|
|
|
|
size_t len;
|
|
|
|
void *nullp;
|
|
|
|
long argc, envc;
|
2001-07-30 01:22:42 +04:00
|
|
|
int error;
|
2001-02-26 23:43:25 +03:00
|
|
|
|
2001-07-30 01:22:42 +04:00
|
|
|
cpp = (char **)*stackp;
|
2001-02-26 23:43:25 +03:00
|
|
|
nullp = NULL;
|
|
|
|
argc = arginfo->ps_nargvstr;
|
|
|
|
envc = arginfo->ps_nenvstr;
|
2014-04-11 15:49:38 +04:00
|
|
|
|
|
|
|
/* argc on stack is long */
|
|
|
|
CTASSERT(sizeof(*cpp) == sizeof(argc));
|
|
|
|
|
|
|
|
dp = (char *)(cpp +
|
2014-04-18 10:59:32 +04:00
|
|
|
1 + /* long argc */
|
|
|
|
argc + /* char *argv[] */
|
2014-04-11 15:49:38 +04:00
|
|
|
1 + /* \0 */
|
2014-04-18 10:59:32 +04:00
|
|
|
envc + /* char *env[] */
|
2017-01-25 20:57:14 +03:00
|
|
|
1) + /* \0 */
|
|
|
|
pack->ep_esch->es_arglen; /* auxinfo */
|
2014-04-11 15:49:38 +04:00
|
|
|
sp = argp;
|
|
|
|
|
2011-01-18 11:21:03 +03:00
|
|
|
if ((error = copyout(&argc, cpp++, sizeof(argc))) != 0) {
|
2011-03-14 02:44:14 +03:00
|
|
|
COPYPRINTF("", cpp - 1, sizeof(argc));
|
2001-07-30 01:22:42 +04:00
|
|
|
return error;
|
2011-01-18 11:21:03 +03:00
|
|
|
}
|
1995-04-22 23:42:47 +04:00
|
|
|
|
|
|
|
/* XXX don't copy them out, remap them! */
|
1995-05-16 18:19:03 +04:00
|
|
|
arginfo->ps_argvstr = cpp; /* remember location of argv for later */
|
1995-04-22 23:42:47 +04:00
|
|
|
|
2011-01-18 11:21:03 +03:00
|
|
|
for (; --argc >= 0; sp += len, dp += len) {
|
|
|
|
if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
|
2011-03-14 02:44:14 +03:00
|
|
|
COPYPRINTF("", cpp - 1, sizeof(dp));
|
2011-01-18 11:21:03 +03:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
|
2011-03-14 23:12:40 +03:00
|
|
|
COPYPRINTF("str", dp, (size_t)ARG_MAX);
|
2001-07-30 01:22:42 +04:00
|
|
|
return error;
|
2011-01-18 11:21:03 +03:00
|
|
|
}
|
|
|
|
}
|
1995-04-22 23:42:47 +04:00
|
|
|
|
2011-01-18 11:21:03 +03:00
|
|
|
if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
|
2011-03-14 02:44:14 +03:00
|
|
|
COPYPRINTF("", cpp - 1, sizeof(nullp));
|
2001-07-30 01:22:42 +04:00
|
|
|
return error;
|
2011-01-18 11:21:03 +03:00
|
|
|
}
|
1995-04-22 23:42:47 +04:00
|
|
|
|
1995-05-16 18:19:03 +04:00
|
|
|
arginfo->ps_envstr = cpp; /* remember location of envp for later */
|
1995-04-22 23:42:47 +04:00
|
|
|
|
2011-01-18 11:21:03 +03:00
|
|
|
for (; --envc >= 0; sp += len, dp += len) {
|
|
|
|
if ((error = copyout(&dp, cpp++, sizeof(dp))) != 0) {
|
2011-03-14 02:44:14 +03:00
|
|
|
COPYPRINTF("", cpp - 1, sizeof(dp));
|
2001-07-30 01:22:42 +04:00
|
|
|
return error;
|
2011-01-18 11:21:03 +03:00
|
|
|
}
|
|
|
|
if ((error = copyoutstr(sp, dp, ARG_MAX, &len)) != 0) {
|
2011-03-14 23:12:40 +03:00
|
|
|
COPYPRINTF("str", dp, (size_t)ARG_MAX);
|
2011-01-18 11:21:03 +03:00
|
|
|
return error;
|
|
|
|
}
|
2012-02-12 03:16:15 +04:00
|
|
|
|
2011-01-18 11:21:03 +03:00
|
|
|
}
|
1995-04-22 23:42:47 +04:00
|
|
|
|
2011-01-18 11:21:03 +03:00
|
|
|
if ((error = copyout(&nullp, cpp++, sizeof(nullp))) != 0) {
|
2011-03-14 02:44:14 +03:00
|
|
|
COPYPRINTF("", cpp - 1, sizeof(nullp));
|
2001-07-30 01:22:42 +04:00
|
|
|
return error;
|
2011-01-18 11:21:03 +03:00
|
|
|
}
|
1995-04-22 23:42:47 +04:00
|
|
|
|
2001-07-30 01:22:42 +04:00
|
|
|
*stackp = (char *)cpp;
|
|
|
|
return 0;
|
1995-04-22 23:42:47 +04:00
|
|
|
}
|
2000-12-08 22:42:11 +03:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
2008-11-19 21:35:57 +03:00
|
|
|
* Add execsw[] entries.
|
2000-12-08 22:42:11 +03:00
|
|
|
*/
|
|
|
|
int
|
2008-11-19 21:35:57 +03:00
|
|
|
exec_add(struct execsw *esp, int count)
|
2000-12-08 22:42:11 +03:00
|
|
|
{
|
2008-11-19 21:35:57 +03:00
|
|
|
struct exec_entry *it;
|
|
|
|
int i;
|
2000-12-08 22:42:11 +03:00
|
|
|
|
2008-11-28 13:55:10 +03:00
|
|
|
if (count == 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
2000-12-08 22:42:11 +03:00
|
|
|
|
2008-11-19 21:35:57 +03:00
|
|
|
/* Check for duplicates. */
|
2007-02-10 00:55:00 +03:00
|
|
|
rw_enter(&exec_lock, RW_WRITER);
|
2008-11-19 21:35:57 +03:00
|
|
|
for (i = 0; i < count; i++) {
|
|
|
|
LIST_FOREACH(it, &ex_head, ex_list) {
|
|
|
|
/* assume unique (makecmds, probe_func, emulation) */
|
|
|
|
if (it->ex_sw->es_makecmds == esp[i].es_makecmds &&
|
|
|
|
it->ex_sw->u.elf_probe_func ==
|
|
|
|
esp[i].u.elf_probe_func &&
|
|
|
|
it->ex_sw->es_emul == esp[i].es_emul) {
|
|
|
|
rw_exit(&exec_lock);
|
|
|
|
return EEXIST;
|
2000-12-08 22:42:11 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-11-19 21:35:57 +03:00
|
|
|
/* Allocate new entries. */
|
|
|
|
for (i = 0; i < count; i++) {
|
|
|
|
it = kmem_alloc(sizeof(*it), KM_SLEEP);
|
|
|
|
it->ex_sw = &esp[i];
|
|
|
|
LIST_INSERT_HEAD(&ex_head, it, ex_list);
|
2000-12-08 22:42:11 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/* update execsw[] */
|
|
|
|
exec_init(0);
|
2007-02-10 00:55:00 +03:00
|
|
|
rw_exit(&exec_lock);
|
2008-11-19 21:35:57 +03:00
|
|
|
return 0;
|
2000-12-08 22:42:11 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Remove execsw[] entry.
|
|
|
|
*/
|
|
|
|
int
|
2008-11-19 21:35:57 +03:00
|
|
|
exec_remove(struct execsw *esp, int count)
|
2000-12-08 22:42:11 +03:00
|
|
|
{
|
2008-11-19 21:35:57 +03:00
|
|
|
struct exec_entry *it, *next;
|
|
|
|
int i;
|
|
|
|
const struct proclist_desc *pd;
|
|
|
|
proc_t *p;
|
2000-12-08 22:42:11 +03:00
|
|
|
|
2008-11-28 13:55:10 +03:00
|
|
|
if (count == 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
2000-12-08 22:42:11 +03:00
|
|
|
|
2008-11-19 21:35:57 +03:00
|
|
|
/* Abort if any are busy. */
|
|
|
|
rw_enter(&exec_lock, RW_WRITER);
|
|
|
|
for (i = 0; i < count; i++) {
|
|
|
|
mutex_enter(proc_lock);
|
|
|
|
for (pd = proclists; pd->pd_list != NULL; pd++) {
|
|
|
|
PROCLIST_FOREACH(p, pd->pd_list) {
|
|
|
|
if (p->p_execsw == &esp[i]) {
|
|
|
|
mutex_exit(proc_lock);
|
|
|
|
rw_exit(&exec_lock);
|
|
|
|
return EBUSY;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
mutex_exit(proc_lock);
|
2000-12-08 22:42:11 +03:00
|
|
|
}
|
|
|
|
|
2008-11-19 21:35:57 +03:00
|
|
|
/* None are busy, so remove them all. */
|
|
|
|
for (i = 0; i < count; i++) {
|
|
|
|
for (it = LIST_FIRST(&ex_head); it != NULL; it = next) {
|
|
|
|
next = LIST_NEXT(it, ex_list);
|
|
|
|
if (it->ex_sw == &esp[i]) {
|
|
|
|
LIST_REMOVE(it, ex_list);
|
|
|
|
kmem_free(it, sizeof(*it));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2000-12-08 22:42:11 +03:00
|
|
|
|
|
|
|
/* update execsw[] */
|
|
|
|
exec_init(0);
|
2007-02-10 00:55:00 +03:00
|
|
|
rw_exit(&exec_lock);
|
2008-11-19 21:35:57 +03:00
|
|
|
return 0;
|
2000-12-08 22:42:11 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize exec structures. If init_boot is true, also does necessary
|
|
|
|
* one-time initialization (it's called from main() that way).
|
2001-11-24 01:02:39 +03:00
|
|
|
* Once system is multiuser, this should be called with exec_lock held,
|
2000-12-08 22:42:11 +03:00
|
|
|
* i.e. via exec_{add|remove}().
|
|
|
|
*/
|
|
|
|
int
|
2001-02-26 23:43:25 +03:00
|
|
|
exec_init(int init_boot)
|
2000-12-08 22:42:11 +03:00
|
|
|
{
|
2008-11-19 21:35:57 +03:00
|
|
|
const struct execsw **sw;
|
|
|
|
struct exec_entry *ex;
|
|
|
|
SLIST_HEAD(,exec_entry) first;
|
|
|
|
SLIST_HEAD(,exec_entry) any;
|
|
|
|
SLIST_HEAD(,exec_entry) last;
|
|
|
|
int i, sz;
|
2000-12-08 22:42:11 +03:00
|
|
|
|
|
|
|
if (init_boot) {
|
|
|
|
/* do one-time initializations */
|
2017-10-20 22:06:46 +03:00
|
|
|
vaddr_t vmin = 0, vmax;
|
Carve out KVA for execargs on boot from an exec_map like we used to.
Candidate fix for PR kern/45718: `processes sometimes get stuck and
spin in vm_map', a problem that has been plaguing all our 32-bit
ports for years.
Since we currently use large (256k) buffers for execargs, and since
nobody has stepped up to tackle breaking them into bite-sized (or at
least page-sized) chunks, after KVA gets sufficiently fragmented we
can't allocate new execargs buffers from kernel_map.
Until 2008, we always carved out KVA for execargs on boot with a uvm
submap exec_map of kernel_map. Then ad@ found that the uvm_km_free
call, to discard them when done, cost about 100us, which a pool
avoided:
https://mail-index.NetBSD.org/tech-kern/2008/06/25/msg001854.html
https://mail-index.NetBSD.org/tech-kern/2008/06/26/msg001859.html
ad@ _simultaneously_ introduced a pool _and_ eliminated the reserved
KVA in the exec_map submap. This change preserves the pool, but
restores exec_map (with less code, by putting it in MI code instead
of copying it in every MD initialization routine).
Patch proposed on tech-kern:
https://mail-index.NetBSD.org/tech-kern/2017/10/19/msg022461.html
Patch tested by bouyer@:
https://mail-index.NetBSD.org/tech-kern/2017/10/20/msg022465.html
I previously discussed the issue on tech-kern before I knew of the
history around exec_map:
https://mail-index.NetBSD.org/tech-kern/2012/12/09/msg014695.html
The candidate workaround I proposed of using pool_setlowat to force
preallocation of KVA would also force preallocation of physical RAM,
which is a waste not incurred by using exec_map, and which is part of
why I never committed it.
There may remain a general problem that if thread A calls pool_get
and tries to service that request by a uvm_km_alloc call that hangs
because KVA is scarce, and thread B does pool_put, the pool_put in
thread B will not notify the pool_get in thread A that it doesn't
need to wait for KVA, and so thread A may continue to hang in
uvm_km_alloc. However,
(a) That won't apply here, because there is exactly as much KVA
available in exec_map as exec_pool will ever try to use.
(b) It is possible that may not even matter in other cases as long as
the page daemon eventually tries to shrink the pool, which will cause
a uvm_km_free that can unhang the hung uvm_km_alloc.
XXX pullup-8
XXX pullup-7
XXX pullup-6
XXX pullup-5, perhaps...
2017-10-20 17:48:43 +03:00
|
|
|
|
2007-02-10 00:55:00 +03:00
|
|
|
rw_init(&exec_lock);
|
2007-12-26 19:01:34 +03:00
|
|
|
mutex_init(&sigobject_lock, MUTEX_DEFAULT, IPL_NONE);
|
Carve out KVA for execargs on boot from an exec_map like we used to.
Candidate fix for PR kern/45718: `processes sometimes get stuck and
spin in vm_map', a problem that has been plaguing all our 32-bit
ports for years.
Since we currently use large (256k) buffers for execargs, and since
nobody has stepped up to tackle breaking them into bite-sized (or at
least page-sized) chunks, after KVA gets sufficiently fragmented we
can't allocate new execargs buffers from kernel_map.
Until 2008, we always carved out KVA for execargs on boot with a uvm
submap exec_map of kernel_map. Then ad@ found that the uvm_km_free
call, to discard them when done, cost about 100us, which a pool
avoided:
https://mail-index.NetBSD.org/tech-kern/2008/06/25/msg001854.html
https://mail-index.NetBSD.org/tech-kern/2008/06/26/msg001859.html
ad@ _simultaneously_ introduced a pool _and_ eliminated the reserved
KVA in the exec_map submap. This change preserves the pool, but
restores exec_map (with less code, by putting it in MI code instead
of copying it in every MD initialization routine).
Patch proposed on tech-kern:
https://mail-index.NetBSD.org/tech-kern/2017/10/19/msg022461.html
Patch tested by bouyer@:
https://mail-index.NetBSD.org/tech-kern/2017/10/20/msg022465.html
I previously discussed the issue on tech-kern before I knew of the
history around exec_map:
https://mail-index.NetBSD.org/tech-kern/2012/12/09/msg014695.html
The candidate workaround I proposed of using pool_setlowat to force
preallocation of KVA would also force preallocation of physical RAM,
which is a waste not incurred by using exec_map, and which is part of
why I never committed it.
There may remain a general problem that if thread A calls pool_get
and tries to service that request by a uvm_km_alloc call that hangs
because KVA is scarce, and thread B does pool_put, the pool_put in
thread B will not notify the pool_get in thread A that it doesn't
need to wait for KVA, and so thread A may continue to hang in
uvm_km_alloc. However,
(a) That won't apply here, because there is exactly as much KVA
available in exec_map as exec_pool will ever try to use.
(b) It is possible that may not even matter in other cases as long as
the page daemon eventually tries to shrink the pool, which will cause
a uvm_km_free that can unhang the hung uvm_km_alloc.
XXX pullup-8
XXX pullup-7
XXX pullup-6
XXX pullup-5, perhaps...
2017-10-20 17:48:43 +03:00
|
|
|
exec_map = uvm_km_suballoc(kernel_map, &vmin, &vmax,
|
|
|
|
maxexec*NCARGS, VM_MAP_PAGEABLE, false, NULL);
|
2008-07-02 21:28:54 +04:00
|
|
|
pool_init(&exec_pool, NCARGS, 0, 0, PR_NOALIGN|PR_NOTOUCH,
|
|
|
|
"execargs", &exec_palloc, IPL_NONE);
|
|
|
|
pool_sethardlimit(&exec_pool, maxexec, "should not happen", 0);
|
2008-11-19 21:35:57 +03:00
|
|
|
} else {
|
|
|
|
KASSERT(rw_write_held(&exec_lock));
|
|
|
|
}
|
2000-12-08 22:42:11 +03:00
|
|
|
|
2008-11-19 21:35:57 +03:00
|
|
|
/* Sort each entry onto the appropriate queue. */
|
|
|
|
SLIST_INIT(&first);
|
|
|
|
SLIST_INIT(&any);
|
|
|
|
SLIST_INIT(&last);
|
|
|
|
sz = 0;
|
|
|
|
LIST_FOREACH(ex, &ex_head, ex_list) {
|
|
|
|
switch(ex->ex_sw->es_prio) {
|
|
|
|
case EXECSW_PRIO_FIRST:
|
|
|
|
SLIST_INSERT_HEAD(&first, ex, ex_slist);
|
|
|
|
break;
|
|
|
|
case EXECSW_PRIO_ANY:
|
|
|
|
SLIST_INSERT_HEAD(&any, ex, ex_slist);
|
|
|
|
break;
|
|
|
|
case EXECSW_PRIO_LAST:
|
|
|
|
SLIST_INSERT_HEAD(&last, ex, ex_slist);
|
|
|
|
break;
|
|
|
|
default:
|
2011-03-14 02:44:14 +03:00
|
|
|
panic("%s", __func__);
|
2008-11-19 21:35:57 +03:00
|
|
|
break;
|
2000-12-08 22:42:11 +03:00
|
|
|
}
|
2008-11-19 21:35:57 +03:00
|
|
|
sz++;
|
2000-12-08 22:42:11 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2008-11-19 21:35:57 +03:00
|
|
|
* Create new execsw[]. Ensure we do not try a zero-sized
|
|
|
|
* allocation.
|
2000-12-08 22:42:11 +03:00
|
|
|
*/
|
2008-11-19 21:35:57 +03:00
|
|
|
sw = kmem_alloc(sz * sizeof(struct execsw *) + 1, KM_SLEEP);
|
|
|
|
i = 0;
|
|
|
|
SLIST_FOREACH(ex, &first, ex_slist) {
|
|
|
|
sw[i++] = ex->ex_sw;
|
2000-12-08 22:42:11 +03:00
|
|
|
}
|
2008-11-19 21:35:57 +03:00
|
|
|
SLIST_FOREACH(ex, &any, ex_slist) {
|
|
|
|
sw[i++] = ex->ex_sw;
|
|
|
|
}
|
|
|
|
SLIST_FOREACH(ex, &last, ex_slist) {
|
|
|
|
sw[i++] = ex->ex_sw;
|
2000-12-08 22:42:11 +03:00
|
|
|
}
|
|
|
|
|
2008-11-19 21:35:57 +03:00
|
|
|
/* Replace old execsw[] and free used memory. */
|
|
|
|
if (execsw != NULL) {
|
|
|
|
kmem_free(__UNCONST(execsw),
|
|
|
|
nexecs * sizeof(struct execsw *) + 1);
|
|
|
|
}
|
|
|
|
execsw = sw;
|
|
|
|
nexecs = sz;
|
2000-12-08 22:42:11 +03:00
|
|
|
|
2008-11-19 21:35:57 +03:00
|
|
|
/* Figure out the maximum size of an exec header. */
|
|
|
|
exec_maxhdrsz = sizeof(int);
|
2000-12-08 22:42:11 +03:00
|
|
|
for (i = 0; i < nexecs; i++) {
|
|
|
|
if (execsw[i]->es_hdrsz > exec_maxhdrsz)
|
|
|
|
exec_maxhdrsz = execsw[i]->es_hdrsz;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
2003-08-24 21:52:28 +04:00
|
|
|
|
|
|
|
static int
|
|
|
|
exec_sigcode_map(struct proc *p, const struct emul *e)
|
|
|
|
{
|
|
|
|
vaddr_t va;
|
|
|
|
vsize_t sz;
|
|
|
|
int error;
|
|
|
|
struct uvm_object *uobj;
|
|
|
|
|
2004-03-25 21:29:24 +03:00
|
|
|
sz = (vaddr_t)e->e_esigcode - (vaddr_t)e->e_sigcode;
|
|
|
|
|
|
|
|
if (e->e_sigobject == NULL || sz == 0) {
|
2003-08-24 21:52:28 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we don't have a sigobject for this emulation, create one.
|
|
|
|
*
|
|
|
|
* sigobject is an anonymous memory object (just like SYSV shared
|
|
|
|
* memory) that we keep a permanent reference to and that we map
|
|
|
|
* in all processes that need this sigcode. The creation is simple,
|
|
|
|
* we create an object, add a permanent reference to it, map it in
|
|
|
|
* kernel space, copy out the sigcode to it and unmap it.
|
2004-09-10 10:09:15 +04:00
|
|
|
* We map it with PROT_READ|PROT_EXEC into the process just
|
|
|
|
* the way sys_mmap() would map it.
|
2003-08-24 21:52:28 +04:00
|
|
|
*/
|
|
|
|
|
|
|
|
uobj = *e->e_sigobject;
|
|
|
|
if (uobj == NULL) {
|
2007-12-26 19:01:34 +03:00
|
|
|
mutex_enter(&sigobject_lock);
|
|
|
|
if ((uobj = *e->e_sigobject) == NULL) {
|
|
|
|
uobj = uao_create(sz, 0);
|
|
|
|
(*uobj->pgops->pgo_reference)(uobj);
|
|
|
|
va = vm_map_min(kernel_map);
|
|
|
|
if ((error = uvm_map(kernel_map, &va, round_page(sz),
|
|
|
|
uobj, 0, 0,
|
|
|
|
UVM_MAPFLAG(UVM_PROT_RW, UVM_PROT_RW,
|
|
|
|
UVM_INH_SHARE, UVM_ADV_RANDOM, 0)))) {
|
|
|
|
printf("kernel mapping failed %d\n", error);
|
|
|
|
(*uobj->pgops->pgo_detach)(uobj);
|
|
|
|
mutex_exit(&sigobject_lock);
|
2014-02-02 18:50:46 +04:00
|
|
|
return error;
|
2007-12-26 19:01:34 +03:00
|
|
|
}
|
|
|
|
memcpy((void *)va, e->e_sigcode, sz);
|
2003-08-24 21:52:28 +04:00
|
|
|
#ifdef PMAP_NEED_PROCWR
|
2007-12-26 19:01:34 +03:00
|
|
|
pmap_procwr(&proc0, va, sz);
|
2003-08-24 21:52:28 +04:00
|
|
|
#endif
|
2007-12-26 19:01:34 +03:00
|
|
|
uvm_unmap(kernel_map, va, va + round_page(sz));
|
|
|
|
*e->e_sigobject = uobj;
|
|
|
|
}
|
|
|
|
mutex_exit(&sigobject_lock);
|
2003-08-24 21:52:28 +04:00
|
|
|
}
|
|
|
|
|
2003-08-29 17:29:32 +04:00
|
|
|
/* Just a hint to uvm_map where to put it. */
|
2005-03-26 08:12:34 +03:00
|
|
|
va = e->e_vm_default_addr(p, (vaddr_t)p->p_vmspace->vm_daddr,
|
2015-11-26 16:15:34 +03:00
|
|
|
round_page(sz), p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
|
2004-06-27 04:55:08 +04:00
|
|
|
|
|
|
|
#ifdef __alpha__
|
|
|
|
/*
|
|
|
|
* Tru64 puts /sbin/loader at the end of user virtual memory,
|
|
|
|
* which causes the above calculation to put the sigcode at
|
|
|
|
* an invalid address. Put it just below the text instead.
|
|
|
|
*/
|
2005-02-12 12:38:25 +03:00
|
|
|
if (va == (vaddr_t)vm_map_max(&p->p_vmspace->vm_map)) {
|
2004-06-27 04:55:08 +04:00
|
|
|
va = (vaddr_t)p->p_vmspace->vm_taddr - round_page(sz);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2003-08-24 21:52:28 +04:00
|
|
|
(*uobj->pgops->pgo_reference)(uobj);
|
|
|
|
error = uvm_map(&p->p_vmspace->vm_map, &va, round_page(sz),
|
|
|
|
uobj, 0, 0,
|
|
|
|
UVM_MAPFLAG(UVM_PROT_RX, UVM_PROT_RX, UVM_INH_SHARE,
|
|
|
|
UVM_ADV_RANDOM, 0));
|
|
|
|
if (error) {
|
2011-03-14 02:44:14 +03:00
|
|
|
DPRINTF(("%s, %d: map %p "
|
2011-01-18 11:21:03 +03:00
|
|
|
"uvm_map %#"PRIxVSIZE"@%#"PRIxVADDR" failed %d\n",
|
2011-03-14 02:44:14 +03:00
|
|
|
__func__, __LINE__, &p->p_vmspace->vm_map, round_page(sz),
|
|
|
|
va, error));
|
2003-08-24 21:52:28 +04:00
|
|
|
(*uobj->pgops->pgo_detach)(uobj);
|
2014-02-02 18:50:46 +04:00
|
|
|
return error;
|
2003-08-24 21:52:28 +04:00
|
|
|
}
|
|
|
|
p->p_sigctx.ps_sigcode = (void *)va;
|
2014-02-02 18:50:46 +04:00
|
|
|
return 0;
|
2003-08-24 21:52:28 +04:00
|
|
|
}
|
2012-02-04 00:11:53 +04:00
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
/*
|
|
|
|
* Release a refcount on spawn_exec_data and destroy memory, if this
|
|
|
|
* was the last one.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
spawn_exec_data_release(struct spawn_exec_data *data)
|
|
|
|
{
|
|
|
|
if (atomic_dec_32_nv(&data->sed_refcnt) != 0)
|
|
|
|
return;
|
|
|
|
|
|
|
|
cv_destroy(&data->sed_cv_child_ready);
|
|
|
|
mutex_destroy(&data->sed_mtx_child);
|
|
|
|
|
|
|
|
if (data->sed_actions)
|
|
|
|
posix_spawn_fa_free(data->sed_actions,
|
|
|
|
data->sed_actions->len);
|
|
|
|
if (data->sed_attrs)
|
|
|
|
kmem_free(data->sed_attrs,
|
|
|
|
sizeof(*data->sed_attrs));
|
|
|
|
kmem_free(data, sizeof(*data));
|
|
|
|
}
|
|
|
|
|
2012-02-12 03:16:15 +04:00
|
|
|
/*
|
|
|
|
* A child lwp of a posix_spawn operation starts here and ends up in
|
|
|
|
* cpu_spawn_return, dealing with all filedescriptor and scheduler
|
|
|
|
* manipulations in between.
|
2014-01-03 19:49:49 +04:00
|
|
|
* The parent waits for the child, as it is not clear whether the child
|
|
|
|
* will be able to acquire its own exec_lock. If it can, the parent can
|
2012-04-08 15:27:44 +04:00
|
|
|
* be released early and continue running in parallel. If not (or if the
|
|
|
|
* magic debug flag is passed in the scheduler attribute struct), the
|
2014-01-03 19:49:49 +04:00
|
|
|
* child rides on the parent's exec lock until it is ready to return to
|
2012-04-08 15:27:44 +04:00
|
|
|
* to userland - and only then releases the parent. This method loses
|
|
|
|
* concurrency, but improves error reporting.
|
2012-02-12 03:16:15 +04:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
spawn_return(void *arg)
|
|
|
|
{
|
|
|
|
struct spawn_exec_data *spawn_data = arg;
|
|
|
|
struct lwp *l = curlwp;
|
|
|
|
int error, newfd;
|
In spawn_return() we temporarily move the process state to SSTOP, but
without updating its p_waited value or its parent's p_nstopchild
counter. Later, we restore the original state, again without any
adjustment of the related values. This leaves a relatively short
window when the values are inconsistent and could interfere with the
proper operation of sys_wait() for the parent (if it manages to be
scheduled; it's not totally clear what, if anything, prevents
scheduling/execution of the parent).
If during this window, any of the checks being made result in an
error, we call exit1() which will eventually migrate the process's
state to SDEAD (with an intermediate transition to SDYING). At
this point the other variables get updated, and we finally restore
a consistent state.
This change updates the p_waited and parent's p_nstopchild at each
step to eliminate any windows during which the values could lead to
incorrect decisions.
Fixes PR kern/50330
Pullups will be requested for NetBSD-7, -6, -6-0, and -6-1
2015-10-13 03:29:34 +03:00
|
|
|
int ostat;
|
2012-02-12 03:16:15 +04:00
|
|
|
size_t i;
|
|
|
|
const struct posix_spawn_file_actions_entry *fae;
|
2012-04-08 15:27:44 +04:00
|
|
|
pid_t ppid;
|
2012-02-12 03:16:15 +04:00
|
|
|
register_t retval;
|
2012-02-20 16:19:55 +04:00
|
|
|
bool have_reflock;
|
2012-04-08 15:27:44 +04:00
|
|
|
bool parent_is_waiting = true;
|
2012-03-10 12:46:45 +04:00
|
|
|
|
2012-02-20 16:19:55 +04:00
|
|
|
/*
|
2012-04-08 15:27:44 +04:00
|
|
|
* Check if we can release parent early.
|
|
|
|
* We either need to have no sed_attrs, or sed_attrs does not
|
|
|
|
* have POSIX_SPAWN_RETURNERROR or one of the flags, that require
|
|
|
|
* safe access to the parent proc (passed in sed_parent).
|
|
|
|
* We then try to get the exec_lock, and only if that works, we can
|
|
|
|
* release the parent here already.
|
2012-02-20 16:19:55 +04:00
|
|
|
*/
|
2012-04-08 15:27:44 +04:00
|
|
|
ppid = spawn_data->sed_parent->p_pid;
|
|
|
|
if ((!spawn_data->sed_attrs
|
|
|
|
|| (spawn_data->sed_attrs->sa_flags
|
|
|
|
& (POSIX_SPAWN_RETURNERROR|POSIX_SPAWN_SETPGROUP)) == 0)
|
|
|
|
&& rw_tryenter(&exec_lock, RW_READER)) {
|
|
|
|
parent_is_waiting = false;
|
|
|
|
mutex_enter(&spawn_data->sed_mtx_child);
|
|
|
|
cv_signal(&spawn_data->sed_cv_child_ready);
|
|
|
|
mutex_exit(&spawn_data->sed_mtx_child);
|
|
|
|
}
|
2012-02-20 16:19:55 +04:00
|
|
|
|
2012-05-03 03:33:11 +04:00
|
|
|
/* don't allow debugger access yet */
|
|
|
|
rw_enter(&l->l_proc->p_reflock, RW_WRITER);
|
|
|
|
have_reflock = true;
|
|
|
|
|
|
|
|
error = 0;
|
2012-02-12 03:16:15 +04:00
|
|
|
/* handle posix_spawn_file_actions */
|
|
|
|
if (spawn_data->sed_actions != NULL) {
|
2012-04-08 15:27:44 +04:00
|
|
|
for (i = 0; i < spawn_data->sed_actions->len; i++) {
|
|
|
|
fae = &spawn_data->sed_actions->fae[i];
|
2012-02-12 03:16:15 +04:00
|
|
|
switch (fae->fae_action) {
|
|
|
|
case FAE_OPEN:
|
2012-02-12 17:14:37 +04:00
|
|
|
if (fd_getfile(fae->fae_fildes) != NULL) {
|
|
|
|
error = fd_close(fae->fae_fildes);
|
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
}
|
2012-02-12 03:16:15 +04:00
|
|
|
error = fd_open(fae->fae_path, fae->fae_oflag,
|
|
|
|
fae->fae_mode, &newfd);
|
2014-02-17 23:29:46 +04:00
|
|
|
if (error)
|
|
|
|
break;
|
2012-02-12 03:16:15 +04:00
|
|
|
if (newfd != fae->fae_fildes) {
|
|
|
|
error = dodup(l, newfd,
|
|
|
|
fae->fae_fildes, 0, &retval);
|
|
|
|
if (fd_getfile(newfd) != NULL)
|
|
|
|
fd_close(newfd);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case FAE_DUP2:
|
|
|
|
error = dodup(l, fae->fae_fildes,
|
|
|
|
fae->fae_newfildes, 0, &retval);
|
|
|
|
break;
|
|
|
|
case FAE_CLOSE:
|
|
|
|
if (fd_getfile(fae->fae_fildes) == NULL) {
|
|
|
|
error = EBADF;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
error = fd_close(fae->fae_fildes);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (error)
|
|
|
|
goto report_error;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* handle posix_spawnattr */
|
|
|
|
if (spawn_data->sed_attrs != NULL) {
|
|
|
|
struct sigaction sigact;
|
|
|
|
sigact._sa_u._sa_handler = SIG_DFL;
|
|
|
|
sigact.sa_flags = 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* set state to SSTOP so that this proc can be found by pid.
|
|
|
|
* see proc_enterprp, do_sched_setparam below
|
|
|
|
*/
|
In spawn_return() we temporarily move the process state to SSTOP, but
without updating its p_waited value or its parent's p_nstopchild
counter. Later, we restore the original state, again without any
adjustment of the related values. This leaves a relatively short
window when the values are inconsistent and could interfere with the
proper operation of sys_wait() for the parent (if it manages to be
scheduled; it's not totally clear what, if anything, prevents
scheduling/execution of the parent).
If during this window, any of the checks being made result in an
error, we call exit1() which will eventually migrate the process's
state to SDEAD (with an intermediate transition to SDYING). At
this point the other variables get updated, and we finally restore
a consistent state.
This change updates the p_waited and parent's p_nstopchild at each
step to eliminate any windows during which the values could lead to
incorrect decisions.
Fixes PR kern/50330
Pullups will be requested for NetBSD-7, -6, -6-0, and -6-1
2015-10-13 03:29:34 +03:00
|
|
|
mutex_enter(proc_lock);
|
|
|
|
/*
|
|
|
|
* p_stat should be SACTIVE, so we need to adjust the
|
|
|
|
* parent's p_nstopchild here. For safety, just make
|
|
|
|
* we're on the good side of SDEAD before we adjust.
|
|
|
|
*/
|
2012-11-08 21:40:46 +04:00
|
|
|
ostat = l->l_proc->p_stat;
|
In spawn_return() we temporarily move the process state to SSTOP, but
without updating its p_waited value or its parent's p_nstopchild
counter. Later, we restore the original state, again without any
adjustment of the related values. This leaves a relatively short
window when the values are inconsistent and could interfere with the
proper operation of sys_wait() for the parent (if it manages to be
scheduled; it's not totally clear what, if anything, prevents
scheduling/execution of the parent).
If during this window, any of the checks being made result in an
error, we call exit1() which will eventually migrate the process's
state to SDEAD (with an intermediate transition to SDYING). At
this point the other variables get updated, and we finally restore
a consistent state.
This change updates the p_waited and parent's p_nstopchild at each
step to eliminate any windows during which the values could lead to
incorrect decisions.
Fixes PR kern/50330
Pullups will be requested for NetBSD-7, -6, -6-0, and -6-1
2015-10-13 03:29:34 +03:00
|
|
|
KASSERT(ostat < SSTOP);
|
2012-02-12 03:16:15 +04:00
|
|
|
l->l_proc->p_stat = SSTOP;
|
In spawn_return() we temporarily move the process state to SSTOP, but
without updating its p_waited value or its parent's p_nstopchild
counter. Later, we restore the original state, again without any
adjustment of the related values. This leaves a relatively short
window when the values are inconsistent and could interfere with the
proper operation of sys_wait() for the parent (if it manages to be
scheduled; it's not totally clear what, if anything, prevents
scheduling/execution of the parent).
If during this window, any of the checks being made result in an
error, we call exit1() which will eventually migrate the process's
state to SDEAD (with an intermediate transition to SDYING). At
this point the other variables get updated, and we finally restore
a consistent state.
This change updates the p_waited and parent's p_nstopchild at each
step to eliminate any windows during which the values could lead to
incorrect decisions.
Fixes PR kern/50330
Pullups will be requested for NetBSD-7, -6, -6-0, and -6-1
2015-10-13 03:29:34 +03:00
|
|
|
l->l_proc->p_waited = 0;
|
|
|
|
l->l_proc->p_pptr->p_nstopchild++;
|
|
|
|
mutex_exit(proc_lock);
|
2012-02-12 03:16:15 +04:00
|
|
|
|
|
|
|
/* Set process group */
|
|
|
|
if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETPGROUP) {
|
|
|
|
pid_t mypid = l->l_proc->p_pid,
|
|
|
|
pgrp = spawn_data->sed_attrs->sa_pgroup;
|
|
|
|
|
|
|
|
if (pgrp == 0)
|
|
|
|
pgrp = mypid;
|
|
|
|
|
|
|
|
error = proc_enterpgrp(spawn_data->sed_parent,
|
|
|
|
mypid, pgrp, false);
|
|
|
|
if (error)
|
In spawn_return() we temporarily move the process state to SSTOP, but
without updating its p_waited value or its parent's p_nstopchild
counter. Later, we restore the original state, again without any
adjustment of the related values. This leaves a relatively short
window when the values are inconsistent and could interfere with the
proper operation of sys_wait() for the parent (if it manages to be
scheduled; it's not totally clear what, if anything, prevents
scheduling/execution of the parent).
If during this window, any of the checks being made result in an
error, we call exit1() which will eventually migrate the process's
state to SDEAD (with an intermediate transition to SDYING). At
this point the other variables get updated, and we finally restore
a consistent state.
This change updates the p_waited and parent's p_nstopchild at each
step to eliminate any windows during which the values could lead to
incorrect decisions.
Fixes PR kern/50330
Pullups will be requested for NetBSD-7, -6, -6-0, and -6-1
2015-10-13 03:29:34 +03:00
|
|
|
goto report_error_stopped;
|
2012-02-12 03:16:15 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Set scheduler policy */
|
|
|
|
if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSCHEDULER)
|
|
|
|
error = do_sched_setparam(l->l_proc->p_pid, 0,
|
|
|
|
spawn_data->sed_attrs->sa_schedpolicy,
|
|
|
|
&spawn_data->sed_attrs->sa_schedparam);
|
|
|
|
else if (spawn_data->sed_attrs->sa_flags
|
|
|
|
& POSIX_SPAWN_SETSCHEDPARAM) {
|
2012-04-08 15:27:44 +04:00
|
|
|
error = do_sched_setparam(ppid, 0,
|
2012-02-12 03:16:15 +04:00
|
|
|
SCHED_NONE, &spawn_data->sed_attrs->sa_schedparam);
|
|
|
|
}
|
|
|
|
if (error)
|
In spawn_return() we temporarily move the process state to SSTOP, but
without updating its p_waited value or its parent's p_nstopchild
counter. Later, we restore the original state, again without any
adjustment of the related values. This leaves a relatively short
window when the values are inconsistent and could interfere with the
proper operation of sys_wait() for the parent (if it manages to be
scheduled; it's not totally clear what, if anything, prevents
scheduling/execution of the parent).
If during this window, any of the checks being made result in an
error, we call exit1() which will eventually migrate the process's
state to SDEAD (with an intermediate transition to SDYING). At
this point the other variables get updated, and we finally restore
a consistent state.
This change updates the p_waited and parent's p_nstopchild at each
step to eliminate any windows during which the values could lead to
incorrect decisions.
Fixes PR kern/50330
Pullups will be requested for NetBSD-7, -6, -6-0, and -6-1
2015-10-13 03:29:34 +03:00
|
|
|
goto report_error_stopped;
|
2012-02-12 03:16:15 +04:00
|
|
|
|
|
|
|
/* Reset user ID's */
|
|
|
|
if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_RESETIDS) {
|
|
|
|
error = do_setresuid(l, -1,
|
|
|
|
kauth_cred_getgid(l->l_cred), -1,
|
|
|
|
ID_E_EQ_R | ID_E_EQ_S);
|
|
|
|
if (error)
|
In spawn_return() we temporarily move the process state to SSTOP, but
without updating its p_waited value or its parent's p_nstopchild
counter. Later, we restore the original state, again without any
adjustment of the related values. This leaves a relatively short
window when the values are inconsistent and could interfere with the
proper operation of sys_wait() for the parent (if it manages to be
scheduled; it's not totally clear what, if anything, prevents
scheduling/execution of the parent).
If during this window, any of the checks being made result in an
error, we call exit1() which will eventually migrate the process's
state to SDEAD (with an intermediate transition to SDYING). At
this point the other variables get updated, and we finally restore
a consistent state.
This change updates the p_waited and parent's p_nstopchild at each
step to eliminate any windows during which the values could lead to
incorrect decisions.
Fixes PR kern/50330
Pullups will be requested for NetBSD-7, -6, -6-0, and -6-1
2015-10-13 03:29:34 +03:00
|
|
|
goto report_error_stopped;
|
2012-02-12 03:16:15 +04:00
|
|
|
error = do_setresuid(l, -1,
|
|
|
|
kauth_cred_getuid(l->l_cred), -1,
|
|
|
|
ID_E_EQ_R | ID_E_EQ_S);
|
|
|
|
if (error)
|
In spawn_return() we temporarily move the process state to SSTOP, but
without updating its p_waited value or its parent's p_nstopchild
counter. Later, we restore the original state, again without any
adjustment of the related values. This leaves a relatively short
window when the values are inconsistent and could interfere with the
proper operation of sys_wait() for the parent (if it manages to be
scheduled; it's not totally clear what, if anything, prevents
scheduling/execution of the parent).
If during this window, any of the checks being made result in an
error, we call exit1() which will eventually migrate the process's
state to SDEAD (with an intermediate transition to SDYING). At
this point the other variables get updated, and we finally restore
a consistent state.
This change updates the p_waited and parent's p_nstopchild at each
step to eliminate any windows during which the values could lead to
incorrect decisions.
Fixes PR kern/50330
Pullups will be requested for NetBSD-7, -6, -6-0, and -6-1
2015-10-13 03:29:34 +03:00
|
|
|
goto report_error_stopped;
|
2012-02-12 03:16:15 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Set signal masks/defaults */
|
|
|
|
if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGMASK) {
|
|
|
|
mutex_enter(l->l_proc->p_lock);
|
|
|
|
error = sigprocmask1(l, SIG_SETMASK,
|
|
|
|
&spawn_data->sed_attrs->sa_sigmask, NULL);
|
|
|
|
mutex_exit(l->l_proc->p_lock);
|
|
|
|
if (error)
|
In spawn_return() we temporarily move the process state to SSTOP, but
without updating its p_waited value or its parent's p_nstopchild
counter. Later, we restore the original state, again without any
adjustment of the related values. This leaves a relatively short
window when the values are inconsistent and could interfere with the
proper operation of sys_wait() for the parent (if it manages to be
scheduled; it's not totally clear what, if anything, prevents
scheduling/execution of the parent).
If during this window, any of the checks being made result in an
error, we call exit1() which will eventually migrate the process's
state to SDEAD (with an intermediate transition to SDYING). At
this point the other variables get updated, and we finally restore
a consistent state.
This change updates the p_waited and parent's p_nstopchild at each
step to eliminate any windows during which the values could lead to
incorrect decisions.
Fixes PR kern/50330
Pullups will be requested for NetBSD-7, -6, -6-0, and -6-1
2015-10-13 03:29:34 +03:00
|
|
|
goto report_error_stopped;
|
2012-02-12 03:16:15 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
if (spawn_data->sed_attrs->sa_flags & POSIX_SPAWN_SETSIGDEF) {
|
2014-02-14 20:35:40 +04:00
|
|
|
/*
|
|
|
|
* The following sigaction call is using a sigaction
|
|
|
|
* version 0 trampoline which is in the compatibility
|
|
|
|
* code only. This is not a problem because for SIG_DFL
|
|
|
|
* and SIG_IGN, the trampolines are now ignored. If they
|
|
|
|
* were not, this would be a problem because we are
|
|
|
|
* holding the exec_lock, and the compat code needs
|
|
|
|
* to do the same in order to replace the trampoline
|
|
|
|
* code of the process.
|
|
|
|
*/
|
2012-02-12 03:16:15 +04:00
|
|
|
for (i = 1; i <= NSIG; i++) {
|
|
|
|
if (sigismember(
|
|
|
|
&spawn_data->sed_attrs->sa_sigdefault, i))
|
|
|
|
sigaction1(l, i, &sigact, NULL, NULL,
|
|
|
|
0);
|
|
|
|
}
|
|
|
|
}
|
In spawn_return() we temporarily move the process state to SSTOP, but
without updating its p_waited value or its parent's p_nstopchild
counter. Later, we restore the original state, again without any
adjustment of the related values. This leaves a relatively short
window when the values are inconsistent and could interfere with the
proper operation of sys_wait() for the parent (if it manages to be
scheduled; it's not totally clear what, if anything, prevents
scheduling/execution of the parent).
If during this window, any of the checks being made result in an
error, we call exit1() which will eventually migrate the process's
state to SDEAD (with an intermediate transition to SDYING). At
this point the other variables get updated, and we finally restore
a consistent state.
This change updates the p_waited and parent's p_nstopchild at each
step to eliminate any windows during which the values could lead to
incorrect decisions.
Fixes PR kern/50330
Pullups will be requested for NetBSD-7, -6, -6-0, and -6-1
2015-10-13 03:29:34 +03:00
|
|
|
mutex_enter(proc_lock);
|
2012-11-08 21:40:46 +04:00
|
|
|
l->l_proc->p_stat = ostat;
|
In spawn_return() we temporarily move the process state to SSTOP, but
without updating its p_waited value or its parent's p_nstopchild
counter. Later, we restore the original state, again without any
adjustment of the related values. This leaves a relatively short
window when the values are inconsistent and could interfere with the
proper operation of sys_wait() for the parent (if it manages to be
scheduled; it's not totally clear what, if anything, prevents
scheduling/execution of the parent).
If during this window, any of the checks being made result in an
error, we call exit1() which will eventually migrate the process's
state to SDEAD (with an intermediate transition to SDYING). At
this point the other variables get updated, and we finally restore
a consistent state.
This change updates the p_waited and parent's p_nstopchild at each
step to eliminate any windows during which the values could lead to
incorrect decisions.
Fixes PR kern/50330
Pullups will be requested for NetBSD-7, -6, -6-0, and -6-1
2015-10-13 03:29:34 +03:00
|
|
|
l->l_proc->p_pptr->p_nstopchild--;
|
|
|
|
mutex_exit(proc_lock);
|
2012-02-12 03:16:15 +04:00
|
|
|
}
|
|
|
|
|
2012-05-03 03:33:11 +04:00
|
|
|
/* now do the real exec */
|
2012-04-08 15:27:44 +04:00
|
|
|
error = execve_runproc(l, &spawn_data->sed_exec, parent_is_waiting,
|
|
|
|
true);
|
2012-02-20 16:19:55 +04:00
|
|
|
have_reflock = false;
|
2012-05-03 03:33:11 +04:00
|
|
|
if (error == EJUSTRETURN)
|
|
|
|
error = 0;
|
|
|
|
else if (error)
|
2012-02-12 03:16:15 +04:00
|
|
|
goto report_error;
|
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
if (parent_is_waiting) {
|
|
|
|
mutex_enter(&spawn_data->sed_mtx_child);
|
|
|
|
cv_signal(&spawn_data->sed_cv_child_ready);
|
|
|
|
mutex_exit(&spawn_data->sed_mtx_child);
|
|
|
|
}
|
2012-03-10 12:46:45 +04:00
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
/* release our refcount on the data */
|
|
|
|
spawn_exec_data_release(spawn_data);
|
2012-02-12 03:16:15 +04:00
|
|
|
|
2014-01-03 19:49:49 +04:00
|
|
|
/* and finally: leave to userland for the first time */
|
2012-02-12 03:16:15 +04:00
|
|
|
cpu_spawn_return(l);
|
|
|
|
|
|
|
|
/* NOTREACHED */
|
|
|
|
return;
|
|
|
|
|
In spawn_return() we temporarily move the process state to SSTOP, but
without updating its p_waited value or its parent's p_nstopchild
counter. Later, we restore the original state, again without any
adjustment of the related values. This leaves a relatively short
window when the values are inconsistent and could interfere with the
proper operation of sys_wait() for the parent (if it manages to be
scheduled; it's not totally clear what, if anything, prevents
scheduling/execution of the parent).
If during this window, any of the checks being made result in an
error, we call exit1() which will eventually migrate the process's
state to SDEAD (with an intermediate transition to SDYING). At
this point the other variables get updated, and we finally restore
a consistent state.
This change updates the p_waited and parent's p_nstopchild at each
step to eliminate any windows during which the values could lead to
incorrect decisions.
Fixes PR kern/50330
Pullups will be requested for NetBSD-7, -6, -6-0, and -6-1
2015-10-13 03:29:34 +03:00
|
|
|
report_error_stopped:
|
|
|
|
mutex_enter(proc_lock);
|
|
|
|
l->l_proc->p_stat = ostat;
|
|
|
|
l->l_proc->p_pptr->p_nstopchild--;
|
|
|
|
mutex_exit(proc_lock);
|
2012-02-12 03:16:15 +04:00
|
|
|
report_error:
|
2014-02-17 23:29:46 +04:00
|
|
|
if (have_reflock) {
|
|
|
|
/*
|
2012-04-15 19:35:00 +04:00
|
|
|
* We have not passed through execve_runproc(),
|
|
|
|
* which would have released the p_reflock and also
|
|
|
|
* taken ownership of the sed_exec part of spawn_data,
|
|
|
|
* so release/free both here.
|
|
|
|
*/
|
2012-02-20 16:19:55 +04:00
|
|
|
rw_exit(&l->l_proc->p_reflock);
|
2012-04-15 19:35:00 +04:00
|
|
|
execve_free_data(&spawn_data->sed_exec);
|
|
|
|
}
|
2012-02-20 16:19:55 +04:00
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
if (parent_is_waiting) {
|
|
|
|
/* pass error to parent */
|
|
|
|
mutex_enter(&spawn_data->sed_mtx_child);
|
|
|
|
spawn_data->sed_error = error;
|
|
|
|
cv_signal(&spawn_data->sed_cv_child_ready);
|
|
|
|
mutex_exit(&spawn_data->sed_mtx_child);
|
|
|
|
} else {
|
|
|
|
rw_exit(&exec_lock);
|
2012-02-12 03:16:15 +04:00
|
|
|
}
|
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
/* release our refcount on the data */
|
|
|
|
spawn_exec_data_release(spawn_data);
|
|
|
|
|
2012-05-03 03:33:11 +04:00
|
|
|
/* done, exit */
|
|
|
|
mutex_enter(l->l_proc->p_lock);
|
2012-04-08 15:27:44 +04:00
|
|
|
/*
|
2012-05-03 03:33:11 +04:00
|
|
|
* Posix explicitly asks for an exit code of 127 if we report
|
2012-04-08 15:27:44 +04:00
|
|
|
* errors from the child process - so, unfortunately, there
|
|
|
|
* is no way to report a more exact error code.
|
|
|
|
* A NetBSD specific workaround is POSIX_SPAWN_RETURNERROR as
|
|
|
|
* flag bit in the attrp argument to posix_spawn(2), see above.
|
|
|
|
*/
|
2016-04-05 02:07:06 +03:00
|
|
|
exit1(l, 127, 0);
|
2012-02-12 03:16:15 +04:00
|
|
|
}
|
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
void
|
2012-02-21 08:13:22 +04:00
|
|
|
posix_spawn_fa_free(struct posix_spawn_file_actions *fa, size_t len)
|
2012-02-20 22:18:30 +04:00
|
|
|
{
|
|
|
|
|
2012-02-21 08:13:22 +04:00
|
|
|
for (size_t i = 0; i < len; i++) {
|
2012-02-20 22:18:30 +04:00
|
|
|
struct posix_spawn_file_actions_entry *fae = &fa->fae[i];
|
|
|
|
if (fae->fae_action != FAE_OPEN)
|
|
|
|
continue;
|
2017-11-07 22:44:04 +03:00
|
|
|
kmem_strfree(fae->fae_path);
|
2012-02-20 22:18:30 +04:00
|
|
|
}
|
2012-04-08 15:27:44 +04:00
|
|
|
if (fa->len > 0)
|
2012-02-21 07:44:54 +04:00
|
|
|
kmem_free(fa->fae, sizeof(*fa->fae) * fa->len);
|
2012-02-20 22:18:30 +04:00
|
|
|
kmem_free(fa, sizeof(*fa));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
posix_spawn_fa_alloc(struct posix_spawn_file_actions **fap,
|
2014-02-02 18:48:57 +04:00
|
|
|
const struct posix_spawn_file_actions *ufa, rlim_t lim)
|
2012-02-20 22:18:30 +04:00
|
|
|
{
|
|
|
|
struct posix_spawn_file_actions *fa;
|
|
|
|
struct posix_spawn_file_actions_entry *fae;
|
|
|
|
char *pbuf = NULL;
|
|
|
|
int error;
|
2012-05-03 03:33:11 +04:00
|
|
|
size_t i = 0;
|
2012-02-20 22:18:30 +04:00
|
|
|
|
|
|
|
fa = kmem_alloc(sizeof(*fa), KM_SLEEP);
|
|
|
|
error = copyin(ufa, fa, sizeof(*fa));
|
2014-01-03 19:49:49 +04:00
|
|
|
if (error || fa->len == 0) {
|
2012-04-08 15:27:44 +04:00
|
|
|
kmem_free(fa, sizeof(*fa));
|
2014-01-03 19:49:49 +04:00
|
|
|
return error; /* 0 if not an error, and len == 0 */
|
2012-04-08 15:27:44 +04:00
|
|
|
}
|
2012-02-20 22:18:30 +04:00
|
|
|
|
2014-02-02 18:48:57 +04:00
|
|
|
if (fa->len > lim) {
|
|
|
|
kmem_free(fa, sizeof(*fa));
|
|
|
|
return EINVAL;
|
|
|
|
}
|
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
fa->size = fa->len;
|
2012-05-03 03:33:11 +04:00
|
|
|
size_t fal = fa->len * sizeof(*fae);
|
|
|
|
fae = fa->fae;
|
|
|
|
fa->fae = kmem_alloc(fal, KM_SLEEP);
|
|
|
|
error = copyin(fae, fa->fae, fal);
|
2012-02-21 08:13:22 +04:00
|
|
|
if (error)
|
2012-02-20 22:18:30 +04:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
pbuf = PNBUF_GET();
|
2012-02-21 08:13:22 +04:00
|
|
|
for (; i < fa->len; i++) {
|
2012-02-20 22:18:30 +04:00
|
|
|
fae = &fa->fae[i];
|
|
|
|
if (fae->fae_action != FAE_OPEN)
|
|
|
|
continue;
|
2012-05-03 03:33:11 +04:00
|
|
|
error = copyinstr(fae->fae_path, pbuf, MAXPATHLEN, &fal);
|
2012-02-21 08:13:22 +04:00
|
|
|
if (error)
|
2012-02-20 22:18:30 +04:00
|
|
|
goto out;
|
2012-05-03 03:33:11 +04:00
|
|
|
fae->fae_path = kmem_alloc(fal, KM_SLEEP);
|
|
|
|
memcpy(fae->fae_path, pbuf, fal);
|
2012-02-20 22:18:30 +04:00
|
|
|
}
|
|
|
|
PNBUF_PUT(pbuf);
|
2012-04-08 15:27:44 +04:00
|
|
|
|
2012-02-20 22:18:30 +04:00
|
|
|
*fap = fa;
|
|
|
|
return 0;
|
|
|
|
out:
|
|
|
|
if (pbuf)
|
|
|
|
PNBUF_PUT(pbuf);
|
2012-02-21 08:13:22 +04:00
|
|
|
posix_spawn_fa_free(fa, i);
|
2012-02-20 22:18:30 +04:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2012-02-12 03:16:15 +04:00
|
|
|
int
|
2012-04-08 15:27:44 +04:00
|
|
|
check_posix_spawn(struct lwp *l1)
|
2012-02-12 03:16:15 +04:00
|
|
|
{
|
2012-04-08 15:27:44 +04:00
|
|
|
int error, tnprocs, count;
|
2012-02-12 03:16:15 +04:00
|
|
|
uid_t uid;
|
2012-04-08 15:27:44 +04:00
|
|
|
struct proc *p1;
|
2012-02-12 03:16:15 +04:00
|
|
|
|
|
|
|
p1 = l1->l_proc;
|
|
|
|
uid = kauth_cred_getuid(l1->l_cred);
|
|
|
|
tnprocs = atomic_inc_uint_nv(&nprocs);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Although process entries are dynamically created, we still keep
|
|
|
|
* a global limit on the maximum number we will create.
|
|
|
|
*/
|
|
|
|
if (__predict_false(tnprocs >= maxproc))
|
|
|
|
error = -1;
|
|
|
|
else
|
|
|
|
error = kauth_authorize_process(l1->l_cred,
|
|
|
|
KAUTH_PROCESS_FORK, p1, KAUTH_ARG(tnprocs), NULL, NULL);
|
|
|
|
|
|
|
|
if (error) {
|
|
|
|
atomic_dec_uint(&nprocs);
|
2012-04-08 15:27:44 +04:00
|
|
|
return EAGAIN;
|
2012-02-12 03:16:15 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Enforce limits.
|
|
|
|
*/
|
|
|
|
count = chgproccnt(uid, 1);
|
2012-03-13 22:40:26 +04:00
|
|
|
if (kauth_authorize_process(l1->l_cred, KAUTH_PROCESS_RLIMIT,
|
|
|
|
p1, KAUTH_ARG(KAUTH_REQ_PROCESS_RLIMIT_BYPASS),
|
|
|
|
&p1->p_rlimit[RLIMIT_NPROC], KAUTH_ARG(RLIMIT_NPROC)) != 0 &&
|
|
|
|
__predict_false(count > p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) {
|
2012-04-08 15:27:44 +04:00
|
|
|
(void)chgproccnt(uid, -1);
|
|
|
|
atomic_dec_uint(&nprocs);
|
|
|
|
return EAGAIN;
|
2012-02-12 03:16:15 +04:00
|
|
|
}
|
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
return 0;
|
|
|
|
}
|
2012-02-20 22:18:30 +04:00
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
int
|
2012-05-03 03:33:11 +04:00
|
|
|
do_posix_spawn(struct lwp *l1, pid_t *pid_res, bool *child_ok, const char *path,
|
|
|
|
struct posix_spawn_file_actions *fa,
|
|
|
|
struct posix_spawnattr *sa,
|
|
|
|
char *const *argv, char *const *envp,
|
|
|
|
execve_fetch_element_t fetch)
|
2012-04-08 15:27:44 +04:00
|
|
|
{
|
2012-05-03 03:33:11 +04:00
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
struct proc *p1, *p2;
|
|
|
|
struct lwp *l2;
|
|
|
|
int error;
|
|
|
|
struct spawn_exec_data *spawn_data;
|
|
|
|
vaddr_t uaddr;
|
|
|
|
pid_t pid;
|
2012-05-03 03:33:11 +04:00
|
|
|
bool have_exec_lock = false;
|
2012-04-08 15:27:44 +04:00
|
|
|
|
|
|
|
p1 = l1->l_proc;
|
|
|
|
|
|
|
|
/* Allocate and init spawn_data */
|
|
|
|
spawn_data = kmem_zalloc(sizeof(*spawn_data), KM_SLEEP);
|
|
|
|
spawn_data->sed_refcnt = 1; /* only parent so far */
|
|
|
|
cv_init(&spawn_data->sed_cv_child_ready, "pspawn");
|
|
|
|
mutex_init(&spawn_data->sed_mtx_child, MUTEX_DEFAULT, IPL_NONE);
|
2012-05-03 03:33:11 +04:00
|
|
|
mutex_enter(&spawn_data->sed_mtx_child);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do the first part of the exec now, collect state
|
|
|
|
* in spawn_data.
|
|
|
|
*/
|
|
|
|
error = execve_loadvm(l1, path, argv,
|
|
|
|
envp, fetch, &spawn_data->sed_exec);
|
|
|
|
if (error == EJUSTRETURN)
|
|
|
|
error = 0;
|
|
|
|
else if (error)
|
|
|
|
goto error_exit;
|
|
|
|
|
|
|
|
have_exec_lock = true;
|
2012-02-12 03:16:15 +04:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate virtual address space for the U-area now, while it
|
|
|
|
* is still easy to abort the fork operation if we're out of
|
|
|
|
* kernel virtual address space.
|
|
|
|
*/
|
|
|
|
uaddr = uvm_uarea_alloc();
|
|
|
|
if (__predict_false(uaddr == 0)) {
|
2012-05-03 03:33:11 +04:00
|
|
|
error = ENOMEM;
|
|
|
|
goto error_exit;
|
2012-02-12 03:16:15 +04:00
|
|
|
}
|
2012-05-03 03:33:11 +04:00
|
|
|
|
2012-02-12 03:16:15 +04:00
|
|
|
/*
|
2012-04-08 15:27:44 +04:00
|
|
|
* Allocate new proc. Borrow proc0 vmspace for it, we will
|
|
|
|
* replace it with its own before returning to userland
|
|
|
|
* in the child.
|
2012-02-12 03:16:15 +04:00
|
|
|
* This is a point of no return, we will have to go through
|
|
|
|
* the child proc to properly clean it up past this point.
|
|
|
|
*/
|
|
|
|
p2 = proc_alloc();
|
|
|
|
pid = p2->p_pid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make a proc table entry for the new process.
|
|
|
|
* Start by zeroing the section of proc that is zero-initialized,
|
|
|
|
* then copy the section that is copied directly from the parent.
|
|
|
|
*/
|
|
|
|
memset(&p2->p_startzero, 0,
|
|
|
|
(unsigned) ((char *)&p2->p_endzero - (char *)&p2->p_startzero));
|
|
|
|
memcpy(&p2->p_startcopy, &p1->p_startcopy,
|
|
|
|
(unsigned) ((char *)&p2->p_endcopy - (char *)&p2->p_startcopy));
|
2012-04-08 15:27:44 +04:00
|
|
|
p2->p_vmspace = proc0.p_vmspace;
|
2012-02-12 03:16:15 +04:00
|
|
|
|
2013-11-23 01:04:11 +04:00
|
|
|
TAILQ_INIT(&p2->p_sigpend.sp_info);
|
2012-02-12 03:16:15 +04:00
|
|
|
|
|
|
|
LIST_INIT(&p2->p_lwps);
|
|
|
|
LIST_INIT(&p2->p_sigwaiters);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Duplicate sub-structures as needed.
|
|
|
|
* Increase reference counts on shared objects.
|
|
|
|
* Inherit flags we want to keep. The flags related to SIGCHLD
|
|
|
|
* handling are important in order to keep a consistent behaviour
|
|
|
|
* for the child after the fork. If we are a 32-bit process, the
|
|
|
|
* child will be too.
|
|
|
|
*/
|
|
|
|
p2->p_flag =
|
|
|
|
p1->p_flag & (PK_SUGID | PK_NOCLDWAIT | PK_CLDSIGIGN | PK_32);
|
|
|
|
p2->p_emul = p1->p_emul;
|
|
|
|
p2->p_execsw = p1->p_execsw;
|
|
|
|
|
|
|
|
mutex_init(&p2->p_stmutex, MUTEX_DEFAULT, IPL_HIGH);
|
|
|
|
mutex_init(&p2->p_auxlock, MUTEX_DEFAULT, IPL_NONE);
|
|
|
|
rw_init(&p2->p_reflock);
|
|
|
|
cv_init(&p2->p_waitcv, "wait");
|
|
|
|
cv_init(&p2->p_lwpcv, "lwpwait");
|
|
|
|
|
|
|
|
p2->p_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE);
|
|
|
|
|
|
|
|
kauth_proc_fork(p1, p2);
|
|
|
|
|
|
|
|
p2->p_raslist = NULL;
|
|
|
|
p2->p_fd = fd_copy();
|
|
|
|
|
|
|
|
/* XXX racy */
|
|
|
|
p2->p_mqueue_cnt = p1->p_mqueue_cnt;
|
|
|
|
|
|
|
|
p2->p_cwdi = cwdinit();
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note: p_limit (rlimit stuff) is copy-on-write, so normally
|
|
|
|
* we just need increase pl_refcnt.
|
|
|
|
*/
|
2012-04-08 15:27:44 +04:00
|
|
|
if (!p1->p_limit->pl_writeable) {
|
|
|
|
lim_addref(p1->p_limit);
|
|
|
|
p2->p_limit = p1->p_limit;
|
2012-02-12 03:16:15 +04:00
|
|
|
} else {
|
|
|
|
p2->p_limit = lim_copy(p1->p_limit);
|
|
|
|
}
|
|
|
|
|
|
|
|
p2->p_lflag = 0;
|
|
|
|
p2->p_sflag = 0;
|
|
|
|
p2->p_slflag = 0;
|
|
|
|
p2->p_pptr = p1;
|
|
|
|
p2->p_ppid = p1->p_pid;
|
|
|
|
LIST_INIT(&p2->p_children);
|
|
|
|
|
|
|
|
p2->p_aio = NULL;
|
|
|
|
|
|
|
|
#ifdef KTRACE
|
|
|
|
/*
|
|
|
|
* Copy traceflag and tracefile if enabled.
|
|
|
|
* If not inherited, these were zeroed above.
|
|
|
|
*/
|
|
|
|
if (p1->p_traceflag & KTRFAC_INHERIT) {
|
|
|
|
mutex_enter(&ktrace_lock);
|
|
|
|
p2->p_traceflag = p1->p_traceflag;
|
|
|
|
if ((p2->p_tracep = p1->p_tracep) != NULL)
|
|
|
|
ktradref(p2);
|
|
|
|
mutex_exit(&ktrace_lock);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Create signal actions for the child process.
|
|
|
|
*/
|
|
|
|
p2->p_sigacts = sigactsinit(p1, 0);
|
|
|
|
mutex_enter(p1->p_lock);
|
|
|
|
p2->p_sflag |=
|
|
|
|
(p1->p_sflag & (PS_STOPFORK | PS_STOPEXEC | PS_NOCLDSTOP));
|
|
|
|
sched_proc_fork(p1, p2);
|
|
|
|
mutex_exit(p1->p_lock);
|
|
|
|
|
|
|
|
p2->p_stflag = p1->p_stflag;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* p_stats.
|
|
|
|
* Copy parts of p_stats, and zero out the rest.
|
|
|
|
*/
|
|
|
|
p2->p_stats = pstatscopy(p1->p_stats);
|
|
|
|
|
|
|
|
/* copy over machdep flags to the new proc */
|
|
|
|
cpu_proc_fork(p1, p2);
|
|
|
|
|
|
|
|
/*
|
2012-05-03 03:33:11 +04:00
|
|
|
* Prepare remaining parts of spawn data
|
2012-02-12 03:16:15 +04:00
|
|
|
*/
|
2012-04-08 15:27:44 +04:00
|
|
|
spawn_data->sed_actions = fa;
|
|
|
|
spawn_data->sed_attrs = sa;
|
2012-05-03 03:33:11 +04:00
|
|
|
|
2012-02-12 03:16:15 +04:00
|
|
|
spawn_data->sed_parent = p1;
|
|
|
|
|
2012-05-03 03:33:11 +04:00
|
|
|
/* create LWP */
|
2012-02-12 03:16:15 +04:00
|
|
|
lwp_create(l1, p2, uaddr, 0, NULL, 0, spawn_return, spawn_data,
|
2017-04-21 18:10:34 +03:00
|
|
|
&l2, l1->l_class, &l1->l_sigmask, &l1->l_sigstk);
|
2012-02-12 03:16:15 +04:00
|
|
|
l2->l_ctxlink = NULL; /* reset ucontext link */
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy the credential so other references don't see our changes.
|
|
|
|
* Test to see if this is necessary first, since in the common case
|
|
|
|
* we won't need a private reference.
|
|
|
|
*/
|
|
|
|
if (kauth_cred_geteuid(l2->l_cred) != kauth_cred_getsvuid(l2->l_cred) ||
|
|
|
|
kauth_cred_getegid(l2->l_cred) != kauth_cred_getsvgid(l2->l_cred)) {
|
|
|
|
l2->l_cred = kauth_cred_copy(l2->l_cred);
|
|
|
|
kauth_cred_setsvuid(l2->l_cred, kauth_cred_geteuid(l2->l_cred));
|
|
|
|
kauth_cred_setsvgid(l2->l_cred, kauth_cred_getegid(l2->l_cred));
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Update the master credentials. */
|
|
|
|
if (l2->l_cred != p2->p_cred) {
|
|
|
|
kauth_cred_t ocred;
|
|
|
|
|
|
|
|
kauth_cred_hold(l2->l_cred);
|
|
|
|
mutex_enter(p2->p_lock);
|
|
|
|
ocred = p2->p_cred;
|
|
|
|
p2->p_cred = l2->l_cred;
|
|
|
|
mutex_exit(p2->p_lock);
|
|
|
|
kauth_cred_free(ocred);
|
|
|
|
}
|
|
|
|
|
2012-05-03 03:33:11 +04:00
|
|
|
*child_ok = true;
|
|
|
|
spawn_data->sed_refcnt = 2; /* child gets it as well */
|
2012-04-08 15:27:44 +04:00
|
|
|
#if 0
|
2012-03-10 12:46:45 +04:00
|
|
|
l2->l_nopreempt = 1; /* start it non-preemptable */
|
2012-04-08 15:27:44 +04:00
|
|
|
#endif
|
2012-03-10 12:46:45 +04:00
|
|
|
|
2012-02-12 03:16:15 +04:00
|
|
|
/*
|
|
|
|
* It's now safe for the scheduler and other processes to see the
|
|
|
|
* child process.
|
|
|
|
*/
|
|
|
|
mutex_enter(proc_lock);
|
|
|
|
|
|
|
|
if (p1->p_session->s_ttyvp != NULL && p1->p_lflag & PL_CONTROLT)
|
|
|
|
p2->p_lflag |= PL_CONTROLT;
|
|
|
|
|
|
|
|
LIST_INSERT_HEAD(&p1->p_children, p2, p_sibling);
|
|
|
|
p2->p_exitsig = SIGCHLD; /* signal for parent on exit */
|
|
|
|
|
|
|
|
LIST_INSERT_AFTER(p1, p2, p_pglist);
|
|
|
|
LIST_INSERT_HEAD(&allproc, p2, p_list);
|
|
|
|
|
|
|
|
p2->p_trace_enabled = trace_is_enabled(p2);
|
|
|
|
#ifdef __HAVE_SYSCALL_INTERN
|
|
|
|
(*p2->p_emul->e_syscall_intern)(p2);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Make child runnable, set start time, and add to run queue except
|
|
|
|
* if the parent requested the child to start in SSTOP state.
|
|
|
|
*/
|
|
|
|
mutex_enter(p2->p_lock);
|
|
|
|
|
|
|
|
getmicrotime(&p2->p_stats->p_start);
|
|
|
|
|
|
|
|
lwp_lock(l2);
|
|
|
|
KASSERT(p2->p_nrlwps == 1);
|
|
|
|
p2->p_nrlwps = 1;
|
|
|
|
p2->p_stat = SACTIVE;
|
|
|
|
l2->l_stat = LSRUN;
|
|
|
|
sched_enqueue(l2, false);
|
|
|
|
lwp_unlock(l2);
|
|
|
|
|
|
|
|
mutex_exit(p2->p_lock);
|
|
|
|
mutex_exit(proc_lock);
|
|
|
|
|
|
|
|
cv_wait(&spawn_data->sed_cv_child_ready, &spawn_data->sed_mtx_child);
|
|
|
|
error = spawn_data->sed_error;
|
2012-04-08 15:27:44 +04:00
|
|
|
mutex_exit(&spawn_data->sed_mtx_child);
|
2012-05-03 03:33:11 +04:00
|
|
|
spawn_exec_data_release(spawn_data);
|
2012-02-12 03:16:15 +04:00
|
|
|
|
2012-02-20 16:19:55 +04:00
|
|
|
rw_exit(&p1->p_reflock);
|
2012-02-12 03:16:15 +04:00
|
|
|
rw_exit(&exec_lock);
|
2012-05-03 03:33:11 +04:00
|
|
|
have_exec_lock = false;
|
2012-05-01 01:19:58 +04:00
|
|
|
|
|
|
|
*pid_res = pid;
|
2012-04-08 15:27:44 +04:00
|
|
|
return error;
|
2012-05-03 03:33:11 +04:00
|
|
|
|
|
|
|
error_exit:
|
2014-02-17 23:29:46 +04:00
|
|
|
if (have_exec_lock) {
|
2012-05-03 03:33:11 +04:00
|
|
|
execve_free_data(&spawn_data->sed_exec);
|
|
|
|
rw_exit(&p1->p_reflock);
|
2014-02-17 23:29:46 +04:00
|
|
|
rw_exit(&exec_lock);
|
2012-05-03 03:33:11 +04:00
|
|
|
}
|
|
|
|
mutex_exit(&spawn_data->sed_mtx_child);
|
|
|
|
spawn_exec_data_release(spawn_data);
|
2014-02-17 23:29:46 +04:00
|
|
|
|
2012-05-03 03:33:11 +04:00
|
|
|
return error;
|
2012-04-08 15:27:44 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
sys_posix_spawn(struct lwp *l1, const struct sys_posix_spawn_args *uap,
|
|
|
|
register_t *retval)
|
|
|
|
{
|
|
|
|
/* {
|
|
|
|
syscallarg(pid_t *) pid;
|
|
|
|
syscallarg(const char *) path;
|
|
|
|
syscallarg(const struct posix_spawn_file_actions *) file_actions;
|
|
|
|
syscallarg(const struct posix_spawnattr *) attrp;
|
|
|
|
syscallarg(char *const *) argv;
|
|
|
|
syscallarg(char *const *) envp;
|
|
|
|
} */
|
|
|
|
|
|
|
|
int error;
|
|
|
|
struct posix_spawn_file_actions *fa = NULL;
|
|
|
|
struct posix_spawnattr *sa = NULL;
|
|
|
|
pid_t pid;
|
2012-05-03 03:33:11 +04:00
|
|
|
bool child_ok = false;
|
2014-02-02 18:48:57 +04:00
|
|
|
rlim_t max_fileactions;
|
|
|
|
proc_t *p = l1->l_proc;
|
2012-04-08 15:27:44 +04:00
|
|
|
|
|
|
|
error = check_posix_spawn(l1);
|
|
|
|
if (error) {
|
|
|
|
*retval = error;
|
|
|
|
return 0;
|
|
|
|
}
|
2012-02-12 03:16:15 +04:00
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
/* copy in file_actions struct */
|
|
|
|
if (SCARG(uap, file_actions) != NULL) {
|
2014-02-02 18:48:57 +04:00
|
|
|
max_fileactions = 2 * min(p->p_rlimit[RLIMIT_NOFILE].rlim_cur,
|
|
|
|
maxfiles);
|
|
|
|
error = posix_spawn_fa_alloc(&fa, SCARG(uap, file_actions),
|
|
|
|
max_fileactions);
|
2012-04-08 15:27:44 +04:00
|
|
|
if (error)
|
2012-05-03 03:33:11 +04:00
|
|
|
goto error_exit;
|
2012-04-08 15:27:44 +04:00
|
|
|
}
|
2012-02-12 03:16:15 +04:00
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
/* copyin posix_spawnattr struct */
|
|
|
|
if (SCARG(uap, attrp) != NULL) {
|
|
|
|
sa = kmem_alloc(sizeof(*sa), KM_SLEEP);
|
|
|
|
error = copyin(SCARG(uap, attrp), sa, sizeof(*sa));
|
|
|
|
if (error)
|
2012-05-03 03:33:11 +04:00
|
|
|
goto error_exit;
|
2012-04-08 15:27:44 +04:00
|
|
|
}
|
2012-02-12 03:16:15 +04:00
|
|
|
|
2012-04-08 15:27:44 +04:00
|
|
|
/*
|
|
|
|
* Do the spawn
|
|
|
|
*/
|
2012-05-03 03:33:11 +04:00
|
|
|
error = do_posix_spawn(l1, &pid, &child_ok, SCARG(uap, path), fa, sa,
|
2012-04-08 15:27:44 +04:00
|
|
|
SCARG(uap, argv), SCARG(uap, envp), execve_fetch_element);
|
|
|
|
if (error)
|
2012-05-03 03:33:11 +04:00
|
|
|
goto error_exit;
|
2012-02-12 03:16:15 +04:00
|
|
|
|
|
|
|
if (error == 0 && SCARG(uap, pid) != NULL)
|
|
|
|
error = copyout(&pid, SCARG(uap, pid), sizeof(pid));
|
|
|
|
|
|
|
|
*retval = error;
|
|
|
|
return 0;
|
|
|
|
|
2012-05-03 03:33:11 +04:00
|
|
|
error_exit:
|
|
|
|
if (!child_ok) {
|
|
|
|
(void)chgproccnt(kauth_cred_getuid(l1->l_cred), -1);
|
|
|
|
atomic_dec_uint(&nprocs);
|
|
|
|
|
|
|
|
if (sa)
|
|
|
|
kmem_free(sa, sizeof(*sa));
|
|
|
|
if (fa)
|
|
|
|
posix_spawn_fa_free(fa, fa->len);
|
|
|
|
}
|
2012-02-12 03:16:15 +04:00
|
|
|
|
|
|
|
*retval = error;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-02-04 00:11:53 +04:00
|
|
|
void
|
|
|
|
exec_free_emul_arg(struct exec_package *epp)
|
|
|
|
{
|
|
|
|
if (epp->ep_emul_arg_free != NULL) {
|
|
|
|
KASSERT(epp->ep_emul_arg != NULL);
|
|
|
|
(*epp->ep_emul_arg_free)(epp->ep_emul_arg);
|
|
|
|
epp->ep_emul_arg_free = NULL;
|
|
|
|
epp->ep_emul_arg = NULL;
|
|
|
|
} else {
|
|
|
|
KASSERT(epp->ep_emul_arg == NULL);
|
|
|
|
}
|
|
|
|
}
|
2014-04-12 11:33:51 +04:00
|
|
|
|
|
|
|
#ifdef DEBUG_EXEC
|
|
|
|
static void
|
|
|
|
dump_vmcmds(const struct exec_package * const epp, size_t x, int error)
|
|
|
|
{
|
|
|
|
struct exec_vmcmd *vp = &epp->ep_vmcmds.evs_cmds[0];
|
|
|
|
size_t j;
|
|
|
|
|
|
|
|
if (error == 0)
|
|
|
|
DPRINTF(("vmcmds %u\n", epp->ep_vmcmds.evs_used));
|
|
|
|
else
|
|
|
|
DPRINTF(("vmcmds %zu/%u, error %d\n", x,
|
|
|
|
epp->ep_vmcmds.evs_used, error));
|
|
|
|
|
|
|
|
for (j = 0; j < epp->ep_vmcmds.evs_used; j++) {
|
|
|
|
DPRINTF(("vmcmd[%zu] = vmcmd_map_%s %#"
|
|
|
|
PRIxVADDR"/%#"PRIxVSIZE" fd@%#"
|
|
|
|
PRIxVSIZE" prot=0%o flags=%d\n", j,
|
|
|
|
vp[j].ev_proc == vmcmd_map_pagedvn ?
|
|
|
|
"pagedvn" :
|
|
|
|
vp[j].ev_proc == vmcmd_map_readvn ?
|
|
|
|
"readvn" :
|
|
|
|
vp[j].ev_proc == vmcmd_map_zero ?
|
|
|
|
"zero" : "*unknown*",
|
|
|
|
vp[j].ev_addr, vp[j].ev_len,
|
|
|
|
vp[j].ev_offset, vp[j].ev_prot,
|
|
|
|
vp[j].ev_flags));
|
|
|
|
if (error != 0 && j == x)
|
|
|
|
DPRINTF((" ^--- failed\n"));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|