qemu/util/oslib-posix.c
Paolo Bonzini 7dda5dc82a migration: initialize RAM to zero
Using qemu_memalign only leaves the RAM zero by chance, because libc
will usually use mmap to satisfy our huge requests.  But memory will
not be zero when using MALLOC_PERTURB_ with a nonzero value.  In the
case of incoming migration, this breaks a recently-introduced
invariant (commit f1c7279, migration: do not sent zero pages in
bulk stage, 2013-03-26).

To fix this, use mmap ourselves to get a well-aligned, always zero
block for the RAM.  Mmap-ed memory is easy to "trim" at the sides.

This also removes the need to do something special on valgrind
(see commit c2a8238a, Support running QEMU on Valgrind, 2011-10-31),
thus effectively reverts that patch.

Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Markus Armbruster <armbru@redhat.com>
Message-id: 1365522223-20153-1-git-send-email-pbonzini@redhat.com
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
2013-04-16 16:10:20 -05:00

227 lines
5.9 KiB
C

/*
* os-posix-lib.c
*
* Copyright (c) 2003-2008 Fabrice Bellard
* Copyright (c) 2010 Red Hat, Inc.
*
* QEMU library functions on POSIX which are shared between QEMU and
* the QEMU tools.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/* The following block of code temporarily renames the daemon() function so the
compiler does not see the warning associated with it in stdlib.h on OSX */
#ifdef __APPLE__
#define daemon qemu_fake_daemon_function
#include <stdlib.h>
#undef daemon
extern int daemon(int, int);
#endif
#if defined(__linux__) && (defined(__x86_64__) || defined(__arm__))
/* Use 2 MiB alignment so transparent hugepages can be used by KVM.
Valgrind does not support alignments larger than 1 MiB,
therefore we need special code which handles running on Valgrind. */
# define QEMU_VMALLOC_ALIGN (512 * 4096)
#elif defined(__linux__) && defined(__s390x__)
/* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
# define QEMU_VMALLOC_ALIGN (256 * 4096)
#else
# define QEMU_VMALLOC_ALIGN getpagesize()
#endif
#include "config-host.h"
#include "sysemu/sysemu.h"
#include "trace.h"
#include "qemu/sockets.h"
#include <sys/mman.h>
#ifdef CONFIG_LINUX
#include <sys/syscall.h>
#endif
int qemu_get_thread_id(void)
{
#if defined(__linux__)
return syscall(SYS_gettid);
#else
return getpid();
#endif
}
int qemu_daemon(int nochdir, int noclose)
{
return daemon(nochdir, noclose);
}
void *qemu_oom_check(void *ptr)
{
if (ptr == NULL) {
fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
abort();
}
return ptr;
}
void *qemu_memalign(size_t alignment, size_t size)
{
void *ptr;
#if defined(_POSIX_C_SOURCE) && !defined(__sun__)
int ret;
ret = posix_memalign(&ptr, alignment, size);
if (ret != 0) {
fprintf(stderr, "Failed to allocate %zu B: %s\n",
size, strerror(ret));
abort();
}
#elif defined(CONFIG_BSD)
ptr = qemu_oom_check(valloc(size));
#else
ptr = qemu_oom_check(memalign(alignment, size));
#endif
trace_qemu_memalign(alignment, size, ptr);
return ptr;
}
/* alloc shared memory pages */
void *qemu_vmalloc(size_t size)
{
size_t align = QEMU_VMALLOC_ALIGN;
size_t total = size + align - getpagesize();
void *ptr = mmap(0, total, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
if (ptr == MAP_FAILED) {
fprintf(stderr, "Failed to allocate %zu B: %s\n",
size, strerror(errno));
abort();
}
ptr += offset;
total -= offset;
if (offset > 0) {
munmap(ptr - offset, offset);
}
if (total > size) {
munmap(ptr + size, total - size);
}
trace_qemu_vmalloc(size, ptr);
return ptr;
}
void qemu_vfree(void *ptr)
{
trace_qemu_vfree(ptr);
free(ptr);
}
void qemu_set_block(int fd)
{
int f;
f = fcntl(fd, F_GETFL);
fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
}
void qemu_set_nonblock(int fd)
{
int f;
f = fcntl(fd, F_GETFL);
fcntl(fd, F_SETFL, f | O_NONBLOCK);
}
void qemu_set_cloexec(int fd)
{
int f;
f = fcntl(fd, F_GETFD);
fcntl(fd, F_SETFD, f | FD_CLOEXEC);
}
/*
* Creates a pipe with FD_CLOEXEC set on both file descriptors
*/
int qemu_pipe(int pipefd[2])
{
int ret;
#ifdef CONFIG_PIPE2
ret = pipe2(pipefd, O_CLOEXEC);
if (ret != -1 || errno != ENOSYS) {
return ret;
}
#endif
ret = pipe(pipefd);
if (ret == 0) {
qemu_set_cloexec(pipefd[0]);
qemu_set_cloexec(pipefd[1]);
}
return ret;
}
int qemu_utimens(const char *path, const struct timespec *times)
{
struct timeval tv[2], tv_now;
struct stat st;
int i;
#ifdef CONFIG_UTIMENSAT
int ret;
ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
if (ret != -1 || errno != ENOSYS) {
return ret;
}
#endif
/* Fallback: use utimes() instead of utimensat() */
/* happy if special cases */
if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
return 0;
}
if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
return utimes(path, NULL);
}
/* prepare for hard cases */
if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
gettimeofday(&tv_now, NULL);
}
if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
stat(path, &st);
}
for (i = 0; i < 2; i++) {
if (times[i].tv_nsec == UTIME_NOW) {
tv[i].tv_sec = tv_now.tv_sec;
tv[i].tv_usec = tv_now.tv_usec;
} else if (times[i].tv_nsec == UTIME_OMIT) {
tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
tv[i].tv_usec = 0;
} else {
tv[i].tv_sec = times[i].tv_sec;
tv[i].tv_usec = times[i].tv_nsec / 1000;
}
}
return utimes(path, &tv[0]);
}