qemu/linux-user/main.c

893 lines
25 KiB
C
Raw Normal View History

/*
* qemu user main
*
* Copyright (c) 2003-2008 Fabrice Bellard
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/units.h"
#include "qemu/accel.h"
#include "sysemu/tcg.h"
#include "qemu-version.h"
#include <sys/syscall.h>
#include <sys/resource.h>
#include <sys/shm.h>
linux-user: manage binfmt-misc preserve-arg[0] flag Add --preserve-argv0 in qemu-binfmt-conf.sh to configure the preserve-argv0 flag. This patch allows to use new flag in AT_FLAGS to detect if preserve-argv0 is configured for this interpreter: argv[0] (the full pathname provided by binfmt-misc) is removed and replaced by argv[1] (the original argv[0] provided by binfmt-misc when 'P'/preserve-arg[0] is set) For instance with this patch and kernel support for AT_FLAGS: $ sudo chroot m68k-chroot sh -c 'echo $0' sh without this patch: $ sudo chroot m68k-chroot sh -c 'echo $0' /usr/bin/sh The new flag is available in kernel (v5.12) since: 2347961b11d4 ("binfmt_misc: pass binfmt_misc flags to the interpreter") This can be tested with something like: # cp ..../qemu-ppc /chroot/powerpc/jessie # qemu-binfmt-conf.sh --qemu-path / --systemd ppc --credential yes \ --persistent no --preserve-argv0 yes # systemctl restart systemd-binfmt.service # cat /proc/sys/fs/binfmt_misc/qemu-ppc enabled interpreter //qemu-ppc flags: POC offset 0 magic 7f454c4601020100000000000000000000020014 mask ffffffffffffff00fffffffffffffffffffeffff # chroot /chroot/powerpc/jessie sh -c 'echo $0' sh # qemu-binfmt-conf.sh --qemu-path / --systemd ppc --credential yes \ --persistent no --preserve-argv0 no # systemctl restart systemd-binfmt.service # cat /proc/sys/fs/binfmt_misc/qemu-ppc enabled interpreter //qemu-ppc flags: OC offset 0 magic 7f454c4601020100000000000000000000020014 mask ffffffffffffff00fffffffffffffffffffeffff # chroot /chroot/powerpc/jessie sh -c 'echo $0' /bin/sh Signed-off-by: Laurent Vivier <laurent@vivier.eu> Message-Id: <20210222105004.1642234-1-laurent@vivier.eu>
2021-02-22 13:50:04 +03:00
#include <linux/binfmts.h>
#include "qapi/error.h"
#include "qemu.h"
#include "qemu/path.h"
#include "qemu/queue.h"
#include "qemu/config-file.h"
#include "qemu/cutils.h"
#include "qemu/error-report.h"
#include "qemu/help_option.h"
#include "qemu/module.h"
#include "qemu/plugin.h"
#include "cpu.h"
#include "exec/exec-all.h"
#include "tcg/tcg.h"
#include "qemu/timer.h"
#include "qemu/envlist.h"
#include "qemu/guest-random.h"
#include "elf.h"
#include "trace/control.h"
#include "target_elf.h"
#include "cpu_loop-common.h"
#include "crypto/init.h"
linux-user: manage binfmt-misc preserve-arg[0] flag Add --preserve-argv0 in qemu-binfmt-conf.sh to configure the preserve-argv0 flag. This patch allows to use new flag in AT_FLAGS to detect if preserve-argv0 is configured for this interpreter: argv[0] (the full pathname provided by binfmt-misc) is removed and replaced by argv[1] (the original argv[0] provided by binfmt-misc when 'P'/preserve-arg[0] is set) For instance with this patch and kernel support for AT_FLAGS: $ sudo chroot m68k-chroot sh -c 'echo $0' sh without this patch: $ sudo chroot m68k-chroot sh -c 'echo $0' /usr/bin/sh The new flag is available in kernel (v5.12) since: 2347961b11d4 ("binfmt_misc: pass binfmt_misc flags to the interpreter") This can be tested with something like: # cp ..../qemu-ppc /chroot/powerpc/jessie # qemu-binfmt-conf.sh --qemu-path / --systemd ppc --credential yes \ --persistent no --preserve-argv0 yes # systemctl restart systemd-binfmt.service # cat /proc/sys/fs/binfmt_misc/qemu-ppc enabled interpreter //qemu-ppc flags: POC offset 0 magic 7f454c4601020100000000000000000000020014 mask ffffffffffffff00fffffffffffffffffffeffff # chroot /chroot/powerpc/jessie sh -c 'echo $0' sh # qemu-binfmt-conf.sh --qemu-path / --systemd ppc --credential yes \ --persistent no --preserve-argv0 no # systemctl restart systemd-binfmt.service # cat /proc/sys/fs/binfmt_misc/qemu-ppc enabled interpreter //qemu-ppc flags: OC offset 0 magic 7f454c4601020100000000000000000000020014 mask ffffffffffffff00fffffffffffffffffffeffff # chroot /chroot/powerpc/jessie sh -c 'echo $0' /bin/sh Signed-off-by: Laurent Vivier <laurent@vivier.eu> Message-Id: <20210222105004.1642234-1-laurent@vivier.eu>
2021-02-22 13:50:04 +03:00
#ifndef AT_FLAGS_PRESERVE_ARGV0
#define AT_FLAGS_PRESERVE_ARGV0_BIT 0
#define AT_FLAGS_PRESERVE_ARGV0 (1 << AT_FLAGS_PRESERVE_ARGV0_BIT)
#endif
char *exec_path;
int singlestep;
static const char *argv0;
static const char *gdbstub;
static envlist_t *envlist;
static const char *cpu_model;
static const char *cpu_type;
static const char *seed_optarg;
unsigned long mmap_min_addr;
uintptr_t guest_base;
bool have_guest_base;
/*
* Used to implement backwards-compatibility for the `-strace`, and
* QEMU_STRACE options. Without this, the QEMU_LOG can be overwritten by
* -strace, or vice versa.
*/
static bool enable_strace;
/*
* The last log mask given by the user in an environment variable or argument.
* Used to support command line arguments overriding environment variables.
*/
static int last_log_mask;
/*
* When running 32-on-64 we should make sure we can fit all of the possible
* guest address space into a contiguous chunk of virtual host memory.
*
* This way we will never overlap with our own libraries or binaries or stack
* or anything else that QEMU maps.
*
* Many cpus reserve the high bit (or more than one for some 64-bit cpus)
* of the address for the kernel. Some cpus rely on this and user space
* uses the high bit(s) for pointer tagging and the like. For them, we
* must preserve the expected address space.
*/
#ifndef MAX_RESERVED_VA
# if HOST_LONG_BITS > TARGET_VIRT_ADDR_SPACE_BITS
# if TARGET_VIRT_ADDR_SPACE_BITS == 32 && \
(TARGET_LONG_BITS == 32 || defined(TARGET_ABI32))
/* There are a number of places where we assign reserved_va to a variable
of type abi_ulong and expect it to fit. Avoid the last page. */
# define MAX_RESERVED_VA(CPU) (0xfffffffful & TARGET_PAGE_MASK)
# else
# define MAX_RESERVED_VA(CPU) (1ul << TARGET_VIRT_ADDR_SPACE_BITS)
# endif
# else
# define MAX_RESERVED_VA(CPU) 0
# endif
#endif
unsigned long reserved_va;
static void usage(int exitcode);
static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX;
const char *qemu_uname_release;
/* XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so
we allocate a bigger stack. Need a better solution, for example
by remapping the process stack directly at the right place */
unsigned long guest_stack_size = 8 * 1024 * 1024UL;
#if defined(TARGET_I386)
int cpu_get_pic_interrupt(CPUX86State *env)
{
return -1;
}
#endif
/***********************************************************/
/* Helper routines for implementing atomic operations. */
/* Make sure everything is in a consistent state for calling fork(). */
void fork_start(void)
{
start_exclusive();
mmap_fork_start();
cpu_list_lock();
}
void fork_end(int child)
{
mmap_fork_end(child);
if (child) {
CPUState *cpu, *next_cpu;
/* Child processes created by fork() only have a single thread.
Discard information about the parent threads. */
CPU_FOREACH_SAFE(cpu, next_cpu) {
if (cpu != thread_cpu) {
QTAILQ_REMOVE_RCU(&cpus, cpu, node);
}
}
qemu_init_cpu_list();
gdbserver_fork(thread_cpu);
/* qemu_init_cpu_list() takes care of reinitializing the
* exclusive state, so we don't need to end_exclusive() here.
*/
} else {
cpu_list_unlock();
end_exclusive();
}
}
__thread CPUState *thread_cpu;
bool qemu_cpu_is_self(CPUState *cpu)
{
return thread_cpu == cpu;
}
void qemu_cpu_kick(CPUState *cpu)
{
cpu_exit(cpu);
}
void task_settid(TaskState *ts)
{
if (ts->ts_tid == 0) {
ts->ts_tid = (pid_t)syscall(SYS_gettid);
}
}
void stop_all_tasks(void)
{
/*
* We trust that when using NPTL, start_exclusive()
* handles thread stopping correctly.
*/
start_exclusive();
}
/* Assumes contents are already zeroed. */
void init_task_state(TaskState *ts)
{
ts->used = 1;
ts->sigaltstack_used = (struct target_sigaltstack) {
.ss_sp = 0,
.ss_size = 0,
.ss_flags = TARGET_SS_DISABLE,
};
}
CPUArchState *cpu_copy(CPUArchState *env)
{
CPUState *cpu = env_cpu(env);
CPUState *new_cpu = cpu_create(cpu_type);
CPUArchState *new_env = new_cpu->env_ptr;
CPUBreakpoint *bp;
CPUWatchpoint *wp;
/* Reset non arch specific state */
cpu_reset(new_cpu);
new_cpu->tcg_cflags = cpu->tcg_cflags;
memcpy(new_env, env, sizeof(CPUArchState));
/* Clone all break/watchpoints.
Note: Once we support ptrace with hw-debug register access, make sure
BP_CPU break/watchpoints are handled correctly on clone. */
QTAILQ_INIT(&new_cpu->breakpoints);
QTAILQ_INIT(&new_cpu->watchpoints);
QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
cpu_breakpoint_insert(new_cpu, bp->pc, bp->flags, NULL);
}
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
cpu_watchpoint_insert(new_cpu, wp->vaddr, wp->len, wp->flags, NULL);
}
return new_env;
}
static void handle_arg_help(const char *arg)
{
usage(EXIT_SUCCESS);
}
static void handle_arg_log(const char *arg)
{
last_log_mask = qemu_str_to_log_mask(arg);
if (!last_log_mask) {
qemu_print_log_usage(stdout);
exit(EXIT_FAILURE);
}
}
static void handle_arg_dfilter(const char *arg)
{
qemu_set_dfilter_ranges(arg, &error_fatal);
}
static void handle_arg_log_filename(const char *arg)
{
qemu_set_log_filename(arg, &error_fatal);
}
static void handle_arg_set_env(const char *arg)
{
char *r, *p, *token;
r = p = strdup(arg);
while ((token = strsep(&p, ",")) != NULL) {
if (envlist_setenv(envlist, token) != 0) {
usage(EXIT_FAILURE);
}
}
free(r);
}
static void handle_arg_unset_env(const char *arg)
{
char *r, *p, *token;
r = p = strdup(arg);
while ((token = strsep(&p, ",")) != NULL) {
if (envlist_unsetenv(envlist, token) != 0) {
usage(EXIT_FAILURE);
}
}
free(r);
}
static void handle_arg_argv0(const char *arg)
{
argv0 = strdup(arg);
}
static void handle_arg_stack_size(const char *arg)
{
char *p;
guest_stack_size = strtoul(arg, &p, 0);
if (guest_stack_size == 0) {
usage(EXIT_FAILURE);
}
if (*p == 'M') {
guest_stack_size *= MiB;
} else if (*p == 'k' || *p == 'K') {
guest_stack_size *= KiB;
}
}
static void handle_arg_ld_prefix(const char *arg)
{
interp_prefix = strdup(arg);
}
static void handle_arg_pagesize(const char *arg)
{
qemu_host_page_size = atoi(arg);
if (qemu_host_page_size == 0 ||
(qemu_host_page_size & (qemu_host_page_size - 1)) != 0) {
fprintf(stderr, "page size must be a power of two\n");
exit(EXIT_FAILURE);
}
}
static void handle_arg_seed(const char *arg)
{
seed_optarg = arg;
}
static void handle_arg_gdb(const char *arg)
{
gdbstub = g_strdup(arg);
}
static void handle_arg_uname(const char *arg)
{
qemu_uname_release = strdup(arg);
}
static void handle_arg_cpu(const char *arg)
{
cpu_model = strdup(arg);
if (cpu_model == NULL || is_help_option(cpu_model)) {
/* XXX: implement xxx_cpu_list for targets that still miss it */
#if defined(cpu_list)
cpu_list();
#endif
exit(EXIT_FAILURE);
}
}
static void handle_arg_guest_base(const char *arg)
{
guest_base = strtol(arg, NULL, 0);
have_guest_base = true;
}
static void handle_arg_reserved_va(const char *arg)
{
char *p;
int shift = 0;
reserved_va = strtoul(arg, &p, 0);
switch (*p) {
case 'k':
case 'K':
shift = 10;
break;
case 'M':
shift = 20;
break;
case 'G':
shift = 30;
break;
}
if (shift) {
unsigned long unshifted = reserved_va;
p++;
reserved_va <<= shift;
if (reserved_va >> shift != unshifted) {
fprintf(stderr, "Reserved virtual address too big\n");
exit(EXIT_FAILURE);
}
}
if (*p) {
fprintf(stderr, "Unrecognised -R size suffix '%s'\n", p);
exit(EXIT_FAILURE);
}
}
static void handle_arg_singlestep(const char *arg)
{
singlestep = 1;
}
static void handle_arg_strace(const char *arg)
{
enable_strace = true;
}
static void handle_arg_version(const char *arg)
{
printf("qemu-" TARGET_NAME " version " QEMU_FULL_VERSION
"\n" QEMU_COPYRIGHT "\n");
exit(EXIT_SUCCESS);
}
static void handle_arg_trace(const char *arg)
{
trace_opt_parse(arg);
}
#if defined(TARGET_XTENSA)
static void handle_arg_abi_call0(const char *arg)
{
xtensa_set_abi_call0();
}
#endif
static QemuPluginList plugins = QTAILQ_HEAD_INITIALIZER(plugins);
#ifdef CONFIG_PLUGIN
static void handle_arg_plugin(const char *arg)
{
qemu_plugin_opt_parse(arg, &plugins);
}
#endif
struct qemu_argument {
const char *argv;
const char *env;
bool has_arg;
void (*handle_opt)(const char *arg);
const char *example;
const char *help;
};
static const struct qemu_argument arg_table[] = {
{"h", "", false, handle_arg_help,
"", "print this help"},
{"help", "", false, handle_arg_help,
"", ""},
{"g", "QEMU_GDB", true, handle_arg_gdb,
"port", "wait gdb connection to 'port'"},
{"L", "QEMU_LD_PREFIX", true, handle_arg_ld_prefix,
"path", "set the elf interpreter prefix to 'path'"},
{"s", "QEMU_STACK_SIZE", true, handle_arg_stack_size,
"size", "set the stack size to 'size' bytes"},
{"cpu", "QEMU_CPU", true, handle_arg_cpu,
"model", "select CPU (-cpu help for list)"},
{"E", "QEMU_SET_ENV", true, handle_arg_set_env,
"var=value", "sets targets environment variable (see below)"},
{"U", "QEMU_UNSET_ENV", true, handle_arg_unset_env,
"var", "unsets targets environment variable (see below)"},
{"0", "QEMU_ARGV0", true, handle_arg_argv0,
"argv0", "forces target process argv[0] to be 'argv0'"},
{"r", "QEMU_UNAME", true, handle_arg_uname,
"uname", "set qemu uname release string to 'uname'"},
{"B", "QEMU_GUEST_BASE", true, handle_arg_guest_base,
"address", "set guest_base address to 'address'"},
{"R", "QEMU_RESERVED_VA", true, handle_arg_reserved_va,
"size", "reserve 'size' bytes for guest virtual address space"},
{"d", "QEMU_LOG", true, handle_arg_log,
"item[,...]", "enable logging of specified items "
"(use '-d help' for a list of items)"},
{"dfilter", "QEMU_DFILTER", true, handle_arg_dfilter,
"range[,...]","filter logging based on address range"},
{"D", "QEMU_LOG_FILENAME", true, handle_arg_log_filename,
"logfile", "write logs to 'logfile' (default stderr)"},
{"p", "QEMU_PAGESIZE", true, handle_arg_pagesize,
"pagesize", "set the host page size to 'pagesize'"},
{"singlestep", "QEMU_SINGLESTEP", false, handle_arg_singlestep,
"", "run in singlestep mode"},
{"strace", "QEMU_STRACE", false, handle_arg_strace,
"", "log system calls"},
{"seed", "QEMU_RAND_SEED", true, handle_arg_seed,
"", "Seed for pseudo-random number generator"},
{"trace", "QEMU_TRACE", true, handle_arg_trace,
"", "[[enable=]<pattern>][,events=<file>][,file=<file>]"},
#ifdef CONFIG_PLUGIN
{"plugin", "QEMU_PLUGIN", true, handle_arg_plugin,
"", "[file=]<file>[,arg=<string>]"},
#endif
{"version", "QEMU_VERSION", false, handle_arg_version,
"", "display version information and exit"},
#if defined(TARGET_XTENSA)
{"xtensa-abi-call0", "QEMU_XTENSA_ABI_CALL0", false, handle_arg_abi_call0,
"", "assume CALL0 Xtensa ABI"},
#endif
{NULL, NULL, false, NULL, NULL, NULL}
};
static void usage(int exitcode)
{
const struct qemu_argument *arginfo;
int maxarglen;
int maxenvlen;
printf("usage: qemu-" TARGET_NAME " [options] program [arguments...]\n"
"Linux CPU emulator (compiled for " TARGET_NAME " emulation)\n"
"\n"
"Options and associated environment variables:\n"
"\n");
/* Calculate column widths. We must always have at least enough space
* for the column header.
*/
maxarglen = strlen("Argument");
maxenvlen = strlen("Env-variable");
for (arginfo = arg_table; arginfo->handle_opt != NULL; arginfo++) {
int arglen = strlen(arginfo->argv);
if (arginfo->has_arg) {
arglen += strlen(arginfo->example) + 1;
}
if (strlen(arginfo->env) > maxenvlen) {
maxenvlen = strlen(arginfo->env);
}
if (arglen > maxarglen) {
maxarglen = arglen;
}
}
printf("%-*s %-*s Description\n", maxarglen+1, "Argument",
maxenvlen, "Env-variable");
for (arginfo = arg_table; arginfo->handle_opt != NULL; arginfo++) {
if (arginfo->has_arg) {
printf("-%s %-*s %-*s %s\n", arginfo->argv,
(int)(maxarglen - strlen(arginfo->argv) - 1),
arginfo->example, maxenvlen, arginfo->env, arginfo->help);
} else {
printf("-%-*s %-*s %s\n", maxarglen, arginfo->argv,
maxenvlen, arginfo->env,
arginfo->help);
}
}
printf("\n"
"Defaults:\n"
"QEMU_LD_PREFIX = %s\n"
"QEMU_STACK_SIZE = %ld byte\n",
interp_prefix,
guest_stack_size);
printf("\n"
"You can use -E and -U options or the QEMU_SET_ENV and\n"
"QEMU_UNSET_ENV environment variables to set and unset\n"
"environment variables for the target process.\n"
"It is possible to provide several variables by separating them\n"
"by commas in getsubopt(3) style. Additionally it is possible to\n"
"provide the -E and -U options multiple times.\n"
"The following lines are equivalent:\n"
" -E var1=val2 -E var2=val2 -U LD_PRELOAD -U LD_DEBUG\n"
" -E var1=val2,var2=val2 -U LD_PRELOAD,LD_DEBUG\n"
" QEMU_SET_ENV=var1=val2,var2=val2 QEMU_UNSET_ENV=LD_PRELOAD,LD_DEBUG\n"
"Note that if you provide several changes to a single variable\n"
"the last change will stay in effect.\n"
"\n"
QEMU_HELP_BOTTOM "\n");
exit(exitcode);
}
static int parse_args(int argc, char **argv)
{
const char *r;
int optind;
const struct qemu_argument *arginfo;
for (arginfo = arg_table; arginfo->handle_opt != NULL; arginfo++) {
if (arginfo->env == NULL) {
continue;
}
r = getenv(arginfo->env);
if (r != NULL) {
arginfo->handle_opt(r);
}
}
optind = 1;
for (;;) {
if (optind >= argc) {
break;
}
r = argv[optind];
if (r[0] != '-') {
break;
}
optind++;
r++;
if (!strcmp(r, "-")) {
break;
}
/* Treat --foo the same as -foo. */
if (r[0] == '-') {
r++;
}
for (arginfo = arg_table; arginfo->handle_opt != NULL; arginfo++) {
if (!strcmp(r, arginfo->argv)) {
if (arginfo->has_arg) {
if (optind >= argc) {
(void) fprintf(stderr,
"qemu: missing argument for option '%s'\n", r);
exit(EXIT_FAILURE);
}
arginfo->handle_opt(argv[optind]);
optind++;
} else {
arginfo->handle_opt(NULL);
}
break;
}
}
/* no option matched the current argv */
if (arginfo->handle_opt == NULL) {
(void) fprintf(stderr, "qemu: unknown option '%s'\n", r);
exit(EXIT_FAILURE);
}
}
if (optind >= argc) {
(void) fprintf(stderr, "qemu: no user program specified\n");
exit(EXIT_FAILURE);
}
exec_path = argv[optind];
return optind;
}
int main(int argc, char **argv, char **envp)
{
struct target_pt_regs regs1, *regs = &regs1;
struct image_info info1, *info = &info1;
struct linux_binprm bprm;
linux-user: fix memory leaks with NPTL emulation Running programs that create large numbers of threads, such as this snippet from libstdc++'s pthread7-rope.cc: const int max_thread_count = 4; const int max_loop_count = 10000; ... for (int j = 0; j < max_loop_count; j++) { ... for (int i = 0; i < max_thread_count; i++) pthread_create (&tid[i], NULL, thread_main, 0); for (int i = 0; i < max_thread_count; i++) pthread_join (tid[i], NULL); } in user-mode emulation will quickly run out of memory. This is caused by a failure to free memory in do_syscall prior to thread exit: /* TODO: Free CPU state. */ pthread_exit(NULL); The first step in fixing this is to make all TaskStates used by QEMU dynamically allocated. The TaskState used by the initial thread was not, as it was allocated on main's stack. So fix that, free the cpu_env, free the TaskState, and we're home free, right? Not exactly. When we create a thread, we do: ts = qemu_mallocz(sizeof(TaskState) + NEW_STACK_SIZE); ... new_stack = ts->stack; ... ret = pthread_attr_setstack(&attr, new_stack, NEW_STACK_SIZE); If we blindly free the TaskState, then, we yank the current (host) thread's stack out from underneath it while it still has things to do, like calling pthread_exit. That causes problems, as you might expect. The solution adopted here is to let the C library allocate the thread's stack (so the C library can properly clean it up at pthread_exit) and provide a hint that we want NEW_STACK_SIZE bytes of stack. With those two changes, we're done, right? Well, almost. You see, we're creating all these host threads and their parent threads never bother to check that their children are finished. There's no good place for the parent threads to do so. Therefore, we need to create the threads in a detached state so the parent thread doesn't have to call pthread_join on the child to release the child's resources; the child does so automatically. With those three major changes, we can comfortably run programs like the above without exhausting memory. We do need to delete 'stack' from the TaskState structure. Signed-off-by: Nathan Froyd <froydnj@codesourcery.com> Signed-off-by: Riku Voipio <riku.voipio@nokia.com>
2010-10-29 18:48:57 +04:00
TaskState *ts;
CPUArchState *env;
CPUState *cpu;
int optind;
char **target_environ, **wrk;
char **target_argv;
int target_argc;
int i;
int ret;
int execfd;
int log_mask;
unsigned long max_reserved_va;
linux-user: manage binfmt-misc preserve-arg[0] flag Add --preserve-argv0 in qemu-binfmt-conf.sh to configure the preserve-argv0 flag. This patch allows to use new flag in AT_FLAGS to detect if preserve-argv0 is configured for this interpreter: argv[0] (the full pathname provided by binfmt-misc) is removed and replaced by argv[1] (the original argv[0] provided by binfmt-misc when 'P'/preserve-arg[0] is set) For instance with this patch and kernel support for AT_FLAGS: $ sudo chroot m68k-chroot sh -c 'echo $0' sh without this patch: $ sudo chroot m68k-chroot sh -c 'echo $0' /usr/bin/sh The new flag is available in kernel (v5.12) since: 2347961b11d4 ("binfmt_misc: pass binfmt_misc flags to the interpreter") This can be tested with something like: # cp ..../qemu-ppc /chroot/powerpc/jessie # qemu-binfmt-conf.sh --qemu-path / --systemd ppc --credential yes \ --persistent no --preserve-argv0 yes # systemctl restart systemd-binfmt.service # cat /proc/sys/fs/binfmt_misc/qemu-ppc enabled interpreter //qemu-ppc flags: POC offset 0 magic 7f454c4601020100000000000000000000020014 mask ffffffffffffff00fffffffffffffffffffeffff # chroot /chroot/powerpc/jessie sh -c 'echo $0' sh # qemu-binfmt-conf.sh --qemu-path / --systemd ppc --credential yes \ --persistent no --preserve-argv0 no # systemctl restart systemd-binfmt.service # cat /proc/sys/fs/binfmt_misc/qemu-ppc enabled interpreter //qemu-ppc flags: OC offset 0 magic 7f454c4601020100000000000000000000020014 mask ffffffffffffff00fffffffffffffffffffeffff # chroot /chroot/powerpc/jessie sh -c 'echo $0' /bin/sh Signed-off-by: Laurent Vivier <laurent@vivier.eu> Message-Id: <20210222105004.1642234-1-laurent@vivier.eu>
2021-02-22 13:50:04 +03:00
bool preserve_argv0;
error_init(argv[0]);
module_call_init(MODULE_INIT_TRACE);
qemu_init_cpu_list();
module_call_init(MODULE_INIT_QOM);
envlist = envlist_create();
/* add current environment into the list */
for (wrk = environ; *wrk != NULL; wrk++) {
(void) envlist_setenv(envlist, *wrk);
}
/* Read the stack limit from the kernel. If it's "unlimited",
then we can do little else besides use the default. */
{
struct rlimit lim;
if (getrlimit(RLIMIT_STACK, &lim) == 0
&& lim.rlim_cur != RLIM_INFINITY
&& lim.rlim_cur == (target_long)lim.rlim_cur) {
guest_stack_size = lim.rlim_cur;
}
}
cpu_model = NULL;
Add cpu model configuration support.. This is a reimplementation of prior versions which adds the ability to define cpu models for contemporary processors. The added models are likewise selected via -cpu <name>, and are intended to displace the existing convention of "-cpu qemu64" augmented with a series of feature flags. A primary motivation was determination of a least common denominator within a given processor class to simplify guest migration. It is still possible to modify an arbitrary model via additional feature flags however the goal here was to make doing so unnecessary in typical usage. The other consideration was providing models names reflective of current processors. Both AMD and Intel have reviewed the models in terms of balancing generality of migration vs. excessive feature downgrade relative to released silicon. This version of the patch replaces the prior hard wired definitions with a configuration file approach for new models. Existing models are thus far left as-is but may easily be transitioned to (or may be overridden by) the configuration file representation. Proposed new model definitions are provided here for current AMD and Intel processors. Each model consists of a name used to select it on the command line (-cpu <name>), and a model_id which corresponds to a least common denominator commercial instance of the processor class. A table of names/model_ids may be queried via "-cpu ?model": : x86 Opteron_G3 AMD Opteron 23xx (Gen 3 Class Opteron) x86 Opteron_G2 AMD Opteron 22xx (Gen 2 Class Opteron) x86 Opteron_G1 AMD Opteron 240 (Gen 1 Class Opteron) x86 Nehalem Intel Core i7 9xx (Nehalem Class Core i7) x86 Penryn Intel Core 2 Duo P9xxx (Penryn Class Core 2) x86 Conroe Intel Celeron_4x0 (Conroe/Merom Class Core 2) : Also added is "-cpu ?dump" which exhaustively outputs all config data for all defined models, and "-cpu ?cpuid" which enumerates all qemu recognized CPUID feature flags. The pseudo cpuid flag 'check' when added to the feature flag list will warn when feature flags (either implicit in a cpu model or explicit on the command line) would have otherwise been quietly unavailable to a guest: # qemu-system-x86_64 ... -cpu Nehalem,check warning: host cpuid 0000_0001 lacks requested flag 'sse4.2|sse4_2' [0x00100000] warning: host cpuid 0000_0001 lacks requested flag 'popcnt' [0x00800000] A similar 'enforce' pseudo flag exists which in addition to the above causes qemu to error exit if requested flags are unavailable. Configuration data for a cpu model resides in the target config file which by default will be installed as: /usr/local/etc/qemu/target-<arch>.conf The format of this file should be self explanatory given the definitions for the above six models and essentially mimics the structure of the static x86_def_t x86_defs. Encoding of cpuid flags names now allows aliases for both the configuration file and the command line which reconciles some Intel/AMD/Linux/Qemu naming differences. This patch was tested relative to qemu.git. Signed-off-by: john cooper <john.cooper@redhat.com> Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
2010-02-20 20:14:59 +03:00
qemu_add_opts(&qemu_trace_opts);
qemu_plugin_add_opts();
optind = parse_args(argc, argv);
log_mask = last_log_mask | (enable_strace ? LOG_STRACE : 0);
if (log_mask) {
qemu_log_needs_buffers();
qemu_set_log(log_mask);
}
if (!trace_init_backends()) {
exit(1);
}
trace_init_file();
qemu_plugin_load_list(&plugins, &error_fatal);
/* Zero out regs */
memset(regs, 0, sizeof(struct target_pt_regs));
/* Zero out image_info */
memset(info, 0, sizeof(struct image_info));
memset(&bprm, 0, sizeof (bprm));
/* Scan interp_prefix dir for replacement files. */
init_paths(interp_prefix);
init_qemu_uname_release();
linux-user: manage binfmt-misc preserve-arg[0] flag Add --preserve-argv0 in qemu-binfmt-conf.sh to configure the preserve-argv0 flag. This patch allows to use new flag in AT_FLAGS to detect if preserve-argv0 is configured for this interpreter: argv[0] (the full pathname provided by binfmt-misc) is removed and replaced by argv[1] (the original argv[0] provided by binfmt-misc when 'P'/preserve-arg[0] is set) For instance with this patch and kernel support for AT_FLAGS: $ sudo chroot m68k-chroot sh -c 'echo $0' sh without this patch: $ sudo chroot m68k-chroot sh -c 'echo $0' /usr/bin/sh The new flag is available in kernel (v5.12) since: 2347961b11d4 ("binfmt_misc: pass binfmt_misc flags to the interpreter") This can be tested with something like: # cp ..../qemu-ppc /chroot/powerpc/jessie # qemu-binfmt-conf.sh --qemu-path / --systemd ppc --credential yes \ --persistent no --preserve-argv0 yes # systemctl restart systemd-binfmt.service # cat /proc/sys/fs/binfmt_misc/qemu-ppc enabled interpreter //qemu-ppc flags: POC offset 0 magic 7f454c4601020100000000000000000000020014 mask ffffffffffffff00fffffffffffffffffffeffff # chroot /chroot/powerpc/jessie sh -c 'echo $0' sh # qemu-binfmt-conf.sh --qemu-path / --systemd ppc --credential yes \ --persistent no --preserve-argv0 no # systemctl restart systemd-binfmt.service # cat /proc/sys/fs/binfmt_misc/qemu-ppc enabled interpreter //qemu-ppc flags: OC offset 0 magic 7f454c4601020100000000000000000000020014 mask ffffffffffffff00fffffffffffffffffffeffff # chroot /chroot/powerpc/jessie sh -c 'echo $0' /bin/sh Signed-off-by: Laurent Vivier <laurent@vivier.eu> Message-Id: <20210222105004.1642234-1-laurent@vivier.eu>
2021-02-22 13:50:04 +03:00
/*
* Manage binfmt-misc open-binary flag
*/
execfd = qemu_getauxval(AT_EXECFD);
if (execfd == 0) {
execfd = open(exec_path, O_RDONLY);
if (execfd < 0) {
printf("Error while loading %s: %s\n", exec_path, strerror(errno));
_exit(EXIT_FAILURE);
}
}
linux-user: manage binfmt-misc preserve-arg[0] flag Add --preserve-argv0 in qemu-binfmt-conf.sh to configure the preserve-argv0 flag. This patch allows to use new flag in AT_FLAGS to detect if preserve-argv0 is configured for this interpreter: argv[0] (the full pathname provided by binfmt-misc) is removed and replaced by argv[1] (the original argv[0] provided by binfmt-misc when 'P'/preserve-arg[0] is set) For instance with this patch and kernel support for AT_FLAGS: $ sudo chroot m68k-chroot sh -c 'echo $0' sh without this patch: $ sudo chroot m68k-chroot sh -c 'echo $0' /usr/bin/sh The new flag is available in kernel (v5.12) since: 2347961b11d4 ("binfmt_misc: pass binfmt_misc flags to the interpreter") This can be tested with something like: # cp ..../qemu-ppc /chroot/powerpc/jessie # qemu-binfmt-conf.sh --qemu-path / --systemd ppc --credential yes \ --persistent no --preserve-argv0 yes # systemctl restart systemd-binfmt.service # cat /proc/sys/fs/binfmt_misc/qemu-ppc enabled interpreter //qemu-ppc flags: POC offset 0 magic 7f454c4601020100000000000000000000020014 mask ffffffffffffff00fffffffffffffffffffeffff # chroot /chroot/powerpc/jessie sh -c 'echo $0' sh # qemu-binfmt-conf.sh --qemu-path / --systemd ppc --credential yes \ --persistent no --preserve-argv0 no # systemctl restart systemd-binfmt.service # cat /proc/sys/fs/binfmt_misc/qemu-ppc enabled interpreter //qemu-ppc flags: OC offset 0 magic 7f454c4601020100000000000000000000020014 mask ffffffffffffff00fffffffffffffffffffeffff # chroot /chroot/powerpc/jessie sh -c 'echo $0' /bin/sh Signed-off-by: Laurent Vivier <laurent@vivier.eu> Message-Id: <20210222105004.1642234-1-laurent@vivier.eu>
2021-02-22 13:50:04 +03:00
/*
* get binfmt_misc flags
*/
preserve_argv0 = !!(qemu_getauxval(AT_FLAGS) & AT_FLAGS_PRESERVE_ARGV0);
/*
* Manage binfmt-misc preserve-arg[0] flag
* argv[optind] full path to the binary
* argv[optind + 1] original argv[0]
*/
if (optind + 1 < argc && preserve_argv0) {
optind++;
}
if (cpu_model == NULL) {
cpu_model = cpu_get_model(get_elf_eflags(execfd));
}
cpu_type = parse_cpu_option(cpu_model);
/* init tcg before creating CPUs and to get qemu_host_page_size */
{
AccelClass *ac = ACCEL_GET_CLASS(current_accel());
ac->init_machine(NULL);
accel_init_interfaces(ac);
}
cpu = cpu_create(cpu_type);
cpu: Make cpu_init() return QOM CPUState object Instead of making cpu_init() return CPUArchState, return CPUState. Changes were made using the Coccinelle semantic patch below. @@ typedef CPUState; identifier e; expression args; type CPUArchState; @@ - e = + cpu = cpu_init(args); - if (!e) { + if (!cpu) { ... } - cpu = ENV_GET_CPU(env); + e = cpu->env_ptr; @@ identifier new_env, new_cpu, env, cpu; type CPUArchState; expression args; @@ -{ - CPUState *cpu = ENV_GET_CPU(env); - CPUArchState *new_env = cpu_init(args); - CPUState *new_cpu = ENV_GET_CPU(new_env); +{ + CPUState *cpu = ENV_GET_CPU(env); + CPUState *new_cpu = cpu_init(args); + CPUArchState *new_env = new_cpu->env_ptr; ... } @@ identifier c, cpu_init_func, cpu_model; type StateType, CPUType; @@ -static inline StateType* cpu_init(const char *cpu_model) -{ - CPUType *c = cpu_init_func(cpu_model); ( - if (c == NULL) { - return NULL; - } - return &c->env; | - if (c) { - return &c->env; - } - return NULL; ) -} +#define cpu_init(cpu_model) CPU(cpu_init_func(cpu_model)) @@ identifier cpu_init_func; identifier model; @@ -#define cpu_init(model) (&cpu_init_func(model)->env) +#define cpu_init(model) CPU(cpu_init_func(model)) Signed-off-by: Eduardo Habkost <ehabkost@redhat.com> Cc: Blue Swirl <blauwirbel@gmail.com> Cc: Guan Xuetao <gxt@mprc.pku.edu.cn> Cc: Riku Voipio <riku.voipio@iki.fi> Cc: Richard Henderson <rth@twiddle.net> Cc: Peter Maydell <peter.maydell@linaro.org> Cc: "Edgar E. Iglesias" <edgar.iglesias@gmail.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Michael Walle <michael@walle.cc> Cc: Aurelien Jarno <aurelien@aurel32.net> Cc: Leon Alrae <leon.alrae@imgtec.com> Cc: Anthony Green <green@moxielogic.com> Cc: Jia Liu <proljc@gmail.com> Cc: Alexander Graf <agraf@suse.de> Cc: Bastian Koppelmann <kbastian@mail.uni-paderborn.de> Cc: Max Filippov <jcmvbkbc@gmail.com> [AF: Fixed up cpu_copy() manually] Signed-off-by: Andreas Färber <afaerber@suse.de>
2015-02-26 23:37:49 +03:00
env = cpu->env_ptr;
cpu_reset(cpu);
thread_cpu = cpu;
/*
* Reserving too much vm space via mmap can run into problems
* with rlimits, oom due to page table creation, etc. We will
* still try it, if directed by the command-line option, but
* not by default.
*/
max_reserved_va = MAX_RESERVED_VA(cpu);
if (reserved_va != 0) {
if (max_reserved_va && reserved_va > max_reserved_va) {
fprintf(stderr, "Reserved virtual address too big\n");
exit(EXIT_FAILURE);
}
} else if (HOST_LONG_BITS == 64 && TARGET_VIRT_ADDR_SPACE_BITS <= 32) {
/*
* reserved_va must be aligned with the host page size
* as it is used with mmap()
*/
reserved_va = max_reserved_va & qemu_host_page_mask;
}
{
Error *err = NULL;
if (seed_optarg != NULL) {
qemu_guest_random_seed_main(seed_optarg, &err);
} else {
qcrypto_init(&err);
}
if (err) {
error_reportf_err(err, "cannot initialize crypto: ");
exit(1);
}
}
target_environ = envlist_to_environ(envlist, NULL);
envlist_free(envlist);
/*
* Read in mmap_min_addr kernel parameter. This value is used
* When loading the ELF image to determine whether guest_base
* is needed. It is also used in mmap_find_vma.
*/
{
FILE *fp;
if ((fp = fopen("/proc/sys/vm/mmap_min_addr", "r")) != NULL) {
unsigned long tmp;
if (fscanf(fp, "%lu", &tmp) == 1 && tmp != 0) {
mmap_min_addr = tmp;
qemu_log_mask(CPU_LOG_PAGE, "host mmap_min_addr=0x%lx\n",
mmap_min_addr);
}
fclose(fp);
}
}
/*
* We prefer to not make NULL pointers accessible to QEMU.
* If we're in a chroot with no /proc, fall back to 1 page.
*/
if (mmap_min_addr == 0) {
mmap_min_addr = qemu_host_page_size;
qemu_log_mask(CPU_LOG_PAGE,
"host mmap_min_addr=0x%lx (fallback)\n",
mmap_min_addr);
}
/*
* Prepare copy of argv vector for target.
*/
target_argc = argc - optind;
target_argv = calloc(target_argc + 1, sizeof (char *));
if (target_argv == NULL) {
avoid TABs in files that only contain a few Most files that have TABs only contain a handful of them. Change them to spaces so that we don't confuse people. disas, standard-headers, linux-headers and libdecnumber are imported from other projects and probably should be exempted from the check. Outside those, after this patch the following files still contain both 8-space and TAB sequences at the beginning of the line. Many of them have a majority of TABs, or were initially committed with all tabs. bsd-user/i386/target_syscall.h bsd-user/x86_64/target_syscall.h crypto/aes.c hw/audio/fmopl.c hw/audio/fmopl.h hw/block/tc58128.c hw/display/cirrus_vga.c hw/display/xenfb.c hw/dma/etraxfs_dma.c hw/intc/sh_intc.c hw/misc/mst_fpga.c hw/net/pcnet.c hw/sh4/sh7750.c hw/timer/m48t59.c hw/timer/sh_timer.c include/crypto/aes.h include/disas/bfd.h include/hw/sh4/sh.h libdecnumber/decNumber.c linux-headers/asm-generic/unistd.h linux-headers/linux/kvm.h linux-user/alpha/target_syscall.h linux-user/arm/nwfpe/double_cpdo.c linux-user/arm/nwfpe/fpa11_cpdt.c linux-user/arm/nwfpe/fpa11_cprt.c linux-user/arm/nwfpe/fpa11.h linux-user/flat.h linux-user/flatload.c linux-user/i386/target_syscall.h linux-user/ppc/target_syscall.h linux-user/sparc/target_syscall.h linux-user/syscall.c linux-user/syscall_defs.h linux-user/x86_64/target_syscall.h slirp/cksum.c slirp/if.c slirp/ip.h slirp/ip_icmp.c slirp/ip_icmp.h slirp/ip_input.c slirp/ip_output.c slirp/mbuf.c slirp/misc.c slirp/sbuf.c slirp/socket.c slirp/socket.h slirp/tcp_input.c slirp/tcpip.h slirp/tcp_output.c slirp/tcp_subr.c slirp/tcp_timer.c slirp/tftp.c slirp/udp.c slirp/udp.h target/cris/cpu.h target/cris/mmu.c target/cris/op_helper.c target/sh4/helper.c target/sh4/op_helper.c target/sh4/translate.c tcg/sparc/tcg-target.inc.c tests/tcg/cris/check_addo.c tests/tcg/cris/check_moveq.c tests/tcg/cris/check_swap.c tests/tcg/multiarch/test-mmap.c ui/vnc-enc-hextile-template.h ui/vnc-enc-zywrle.h util/envlist.c util/readline.c The following have only TABs: bsd-user/i386/target_signal.h bsd-user/sparc64/target_signal.h bsd-user/sparc64/target_syscall.h bsd-user/sparc/target_signal.h bsd-user/sparc/target_syscall.h bsd-user/x86_64/target_signal.h crypto/desrfb.c hw/audio/intel-hda-defs.h hw/core/uboot_image.h hw/sh4/sh7750_regnames.c hw/sh4/sh7750_regs.h include/hw/cris/etraxfs_dma.h linux-user/alpha/termbits.h linux-user/arm/nwfpe/fpopcode.h linux-user/arm/nwfpe/fpsr.h linux-user/arm/syscall_nr.h linux-user/arm/target_signal.h linux-user/cris/target_signal.h linux-user/i386/target_signal.h linux-user/linux_loop.h linux-user/m68k/target_signal.h linux-user/microblaze/target_signal.h linux-user/mips64/target_signal.h linux-user/mips/target_signal.h linux-user/mips/target_syscall.h linux-user/mips/termbits.h linux-user/ppc/target_signal.h linux-user/sh4/target_signal.h linux-user/sh4/termbits.h linux-user/sparc64/target_syscall.h linux-user/sparc/target_signal.h linux-user/x86_64/target_signal.h linux-user/x86_64/termbits.h pc-bios/optionrom/optionrom.h slirp/mbuf.h slirp/misc.h slirp/sbuf.h slirp/tcp.h slirp/tcp_timer.h slirp/tcp_var.h target/i386/svm.h target/sparc/asi.h target/xtensa/core-dc232b/xtensa-modules.inc.c target/xtensa/core-dc233c/xtensa-modules.inc.c target/xtensa/core-de212/core-isa.h target/xtensa/core-de212/xtensa-modules.inc.c target/xtensa/core-fsf/xtensa-modules.inc.c target/xtensa/core-sample_controller/core-isa.h target/xtensa/core-sample_controller/xtensa-modules.inc.c target/xtensa/core-test_kc705_be/core-isa.h target/xtensa/core-test_kc705_be/xtensa-modules.inc.c tests/tcg/cris/check_abs.c tests/tcg/cris/check_addc.c tests/tcg/cris/check_addcm.c tests/tcg/cris/check_addoq.c tests/tcg/cris/check_bound.c tests/tcg/cris/check_ftag.c tests/tcg/cris/check_int64.c tests/tcg/cris/check_lz.c tests/tcg/cris/check_openpf5.c tests/tcg/cris/check_sigalrm.c tests/tcg/cris/crisutils.h tests/tcg/cris/sys.c tests/tcg/i386/test-i386-ssse3.c ui/vgafont.h Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20181213223737.11793-3-pbonzini@redhat.com> Reviewed-by: Aleksandar Markovic <amarkovic@wavecomp.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Reviewed-by: Wainer dos Santos Moschetta <wainersm@redhat.com> Acked-by: Richard Henderson <richard.henderson@linaro.org> Acked-by: Eric Blake <eblake@redhat.com> Acked-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Stefan Markovic <smarkovic@wavecomp.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2018-12-14 01:37:37 +03:00
(void) fprintf(stderr, "Unable to allocate memory for target_argv\n");
exit(EXIT_FAILURE);
}
/*
* If argv0 is specified (using '-0' switch) we replace
* argv[0] pointer with the given one.
*/
i = 0;
if (argv0 != NULL) {
target_argv[i++] = strdup(argv0);
}
for (; i < target_argc; i++) {
target_argv[i] = strdup(argv[optind + i]);
}
target_argv[target_argc] = NULL;
ts = g_new0(TaskState, 1);
init_task_state(ts);
/* build Task State */
ts->info = info;
ts->bprm = &bprm;
cpu->opaque = ts;
task_settid(ts);
ret = loader_exec(execfd, exec_path, target_argv, target_environ, regs,
info, &bprm);
if (ret != 0) {
printf("Error while loading %s: %s\n", exec_path, strerror(-ret));
_exit(EXIT_FAILURE);
}
for (wrk = target_environ; *wrk; wrk++) {
g_free(*wrk);
}
g_free(target_environ);
if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
qemu_log("guest_base %p\n", (void *)guest_base);
log_page_dump("binary load");
qemu_log("start_brk 0x" TARGET_ABI_FMT_lx "\n", info->start_brk);
qemu_log("end_code 0x" TARGET_ABI_FMT_lx "\n", info->end_code);
qemu_log("start_code 0x" TARGET_ABI_FMT_lx "\n", info->start_code);
qemu_log("start_data 0x" TARGET_ABI_FMT_lx "\n", info->start_data);
qemu_log("end_data 0x" TARGET_ABI_FMT_lx "\n", info->end_data);
qemu_log("start_stack 0x" TARGET_ABI_FMT_lx "\n", info->start_stack);
qemu_log("brk 0x" TARGET_ABI_FMT_lx "\n", info->brk);
qemu_log("entry 0x" TARGET_ABI_FMT_lx "\n", info->entry);
qemu_log("argv_start 0x" TARGET_ABI_FMT_lx "\n", info->arg_start);
qemu_log("env_start 0x" TARGET_ABI_FMT_lx "\n",
info->arg_end + (abi_ulong)sizeof(abi_ulong));
qemu_log("auxv_start 0x" TARGET_ABI_FMT_lx "\n", info->saved_auxv);
}
target_set_brk(info->brk);
syscall_init();
signal_init();
/* Now that we've loaded the binary, GUEST_BASE is fixed. Delay
generating the prologue until now so that the prologue can take
the real value of GUEST_BASE into account. */
tcg_prologue_init(tcg_ctx);
tcg: introduce regions to split code_gen_buffer This is groundwork for supporting multiple TCG contexts. The naive solution here is to split code_gen_buffer statically among the TCG threads; this however results in poor utilization if translation needs are different across TCG threads. What we do here is to add an extra layer of indirection, assigning regions that act just like pages do in virtual memory allocation. (BTW if you are wondering about the chosen naming, I did not want to use blocks or pages because those are already heavily used in QEMU). We use a global lock to serialize allocations as well as statistics reporting (we now export the size of the used code_gen_buffer with tcg_code_size()). Note that for the allocator we could just use a counter and atomic_inc; however, that would complicate the gathering of tcg_code_size()-like stats. So given that the region operations are not a fast path, a lock seems the most reasonable choice. The effectiveness of this approach is clear after seeing some numbers. I used the bootup+shutdown of debian-arm with '-tb-size 80' as a benchmark. Note that I'm evaluating this after enabling per-thread TCG (which is done by a subsequent commit). * -smp 1, 1 region (entire buffer): qemu: flush code_size=83885014 nb_tbs=154739 avg_tb_size=357 qemu: flush code_size=83884902 nb_tbs=153136 avg_tb_size=363 qemu: flush code_size=83885014 nb_tbs=152777 avg_tb_size=364 qemu: flush code_size=83884950 nb_tbs=150057 avg_tb_size=373 qemu: flush code_size=83884998 nb_tbs=150234 avg_tb_size=373 qemu: flush code_size=83885014 nb_tbs=154009 avg_tb_size=360 qemu: flush code_size=83885014 nb_tbs=151007 avg_tb_size=370 qemu: flush code_size=83885014 nb_tbs=151816 avg_tb_size=367 That is, 8 flushes. * -smp 8, 32 regions (80/32 MB per region) [i.e. this patch]: qemu: flush code_size=76328008 nb_tbs=141040 avg_tb_size=356 qemu: flush code_size=75366534 nb_tbs=138000 avg_tb_size=361 qemu: flush code_size=76864546 nb_tbs=140653 avg_tb_size=361 qemu: flush code_size=76309084 nb_tbs=135945 avg_tb_size=375 qemu: flush code_size=74581856 nb_tbs=132909 avg_tb_size=375 qemu: flush code_size=73927256 nb_tbs=135616 avg_tb_size=360 qemu: flush code_size=78629426 nb_tbs=142896 avg_tb_size=365 qemu: flush code_size=76667052 nb_tbs=138508 avg_tb_size=368 Again, 8 flushes. Note how buffer utilization is not 100%, but it is close. Smaller region sizes would yield higher utilization, but we want region allocation to be rare (it acquires a lock), so we do not want to go too small. * -smp 8, static partitioning of 8 regions (10 MB per region): qemu: flush code_size=21936504 nb_tbs=40570 avg_tb_size=354 qemu: flush code_size=11472174 nb_tbs=20633 avg_tb_size=370 qemu: flush code_size=11603976 nb_tbs=21059 avg_tb_size=365 qemu: flush code_size=23254872 nb_tbs=41243 avg_tb_size=377 qemu: flush code_size=28289496 nb_tbs=52057 avg_tb_size=358 qemu: flush code_size=43605160 nb_tbs=78896 avg_tb_size=367 qemu: flush code_size=45166552 nb_tbs=82158 avg_tb_size=364 qemu: flush code_size=63289640 nb_tbs=116494 avg_tb_size=358 qemu: flush code_size=51389960 nb_tbs=93937 avg_tb_size=362 qemu: flush code_size=59665928 nb_tbs=107063 avg_tb_size=372 qemu: flush code_size=38380824 nb_tbs=68597 avg_tb_size=374 qemu: flush code_size=44884568 nb_tbs=79901 avg_tb_size=376 qemu: flush code_size=50782632 nb_tbs=90681 avg_tb_size=374 qemu: flush code_size=39848888 nb_tbs=71433 avg_tb_size=372 qemu: flush code_size=64708840 nb_tbs=119052 avg_tb_size=359 qemu: flush code_size=49830008 nb_tbs=90992 avg_tb_size=362 qemu: flush code_size=68372408 nb_tbs=123442 avg_tb_size=368 qemu: flush code_size=33555560 nb_tbs=59514 avg_tb_size=378 qemu: flush code_size=44748344 nb_tbs=80974 avg_tb_size=367 qemu: flush code_size=37104248 nb_tbs=67609 avg_tb_size=364 That is, 20 flushes. Note how a static partitioning approach uses the code buffer poorly, leading to many unnecessary flushes. Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Emilio G. Cota <cota@braap.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2017-07-08 02:24:20 +03:00
tcg_region_init();
target_cpu_copy_regs(env, regs);
if (gdbstub) {
if (gdbserver_start(gdbstub) < 0) {
fprintf(stderr, "qemu: could not open gdbserver on %s\n",
gdbstub);
exit(EXIT_FAILURE);
}
gdb_handlesig(cpu, 0);
}
cpu_loop(env);
/* never exits */
return 0;
}