From 4a5457616d5b3116a8ae76748f9c493a4b9e07f0 Mon Sep 17 00:00:00 2001 From: Shivaprasad G Bhat Date: Fri, 13 Jul 2018 07:34:46 -0500 Subject: [PATCH 1/4] linux-user: ppc64: use the correct values for F_*LK64s Qemu includes the glibc headers for the host defines and target headers are part of the qemu source themselves. The glibc has the F_GETLK64, F_SETLK64 and F_SETLKW64 defined to 12, 13 and 14 for all archs in sysdeps/unix/sysv/linux/bits/fcntl-linux.h. The linux kernel generic definition for F_*LK is 5, 6 & 7 and F_*LK64* is 12,13, and 14 as seen in include/uapi/asm-generic/fcntl.h. On 64bit machine, by default the kernel assumes all F_*LK to 64bit calls and doesnt support use of F_*LK64* as can be seen in include/linux/fcntl.h in linux source. On x86_64 host, the values for F_*LK64* are set to 5, 6 and 7 explicitly in /usr/include/x86_64-linux-gnu/bits/fcntl.h by the glibc. Whereas, a PPC64 host doesn't have such a definition in /usr/include/powerpc64le-linux-gnu/bits/fcntl.h by the glibc. So, the sources on PPC64 host sees the default value of F_*LK64* as 12, 13 & 14(fcntl-linux.h). Since the 64bit kernel doesnt support 12, 13 & 14; the glibc fcntl syscall implementation(__libc_fcntl*(), __fcntl64_nocancel) does the F_*LK64* value convertion back to F_*LK* values on PPC64 as seen in sysdeps/unix/sysv/linux/powerpc/powerpc64/sysdep.h with FCNTL_ADJUST_CMD() macro. Whereas on x86_64 host the values for F_*LK64* are set to 5, 6 and 7 and no adjustments are needed. Since qemu doesnt use the glibc fcntl, but makes the safe_syscall* on its own, the PPC64 qemu is calling the syscall with 12, 13, and 14(without adjustment) and they all fail. The fcntl calls to F_GETLK/F_SETLK|W all fail by all pplications run on PPC64 host user emulation. The fix here could be to see why on PPC64 the glibc is still keeping F_*LK64* different from F_*LK and why adjusting them to 5, 6 and 7 before the syscall for PPC only. See if we can make the /usr/include/powerpc64le-linux-gnu/bits/fcntl.h to have the values 5, 6 & 7 just like x86_64 and remove the adjustment code in glibc. That way, qemu sources see the kernel supported values in glibc headers. OR On PPC64 host, qemu sources see both F_*LK & F_*LK64* as same and set to 12, 13 and 14 because __USE_FILE_OFFSET64 is defined in qemu sources(also refer sysdeps/unix/sysv/linux/bits/fcntl-linux.h). Do the value adjustment just like it is done by glibc source by using F_GETLK value of 5. That way, we make the syscalls with the actual supported values in Qemu. The patch is taking this approach. Signed-off-by: Shivaprasad G Bhat Reviewed-by: David Gibson Reviewed-by: Laurent Vivier Message-Id: <153148521235.87746.14142430397318741182.stgit@lep8c.aus.stglabs.ibm.com> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 124 +++++++++++++++++++++++++++---------------- 1 file changed, 79 insertions(+), 45 deletions(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index e4b1b7d7da..b8b7bced9f 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -6545,63 +6545,97 @@ static int do_fork(CPUArchState *env, unsigned int flags, abi_ulong newsp, /* warning : doesn't handle linux specific flags... */ static int target_to_host_fcntl_cmd(int cmd) { + int ret; + switch(cmd) { - case TARGET_F_DUPFD: - case TARGET_F_GETFD: - case TARGET_F_SETFD: - case TARGET_F_GETFL: - case TARGET_F_SETFL: - return cmd; - case TARGET_F_GETLK: - return F_GETLK64; - case TARGET_F_SETLK: - return F_SETLK64; - case TARGET_F_SETLKW: - return F_SETLKW64; - case TARGET_F_GETOWN: - return F_GETOWN; - case TARGET_F_SETOWN: - return F_SETOWN; - case TARGET_F_GETSIG: - return F_GETSIG; - case TARGET_F_SETSIG: - return F_SETSIG; + case TARGET_F_DUPFD: + case TARGET_F_GETFD: + case TARGET_F_SETFD: + case TARGET_F_GETFL: + case TARGET_F_SETFL: + ret = cmd; + break; + case TARGET_F_GETLK: + ret = F_GETLK64; + break; + case TARGET_F_SETLK: + ret = F_SETLK64; + break; + case TARGET_F_SETLKW: + ret = F_SETLKW64; + break; + case TARGET_F_GETOWN: + ret = F_GETOWN; + break; + case TARGET_F_SETOWN: + ret = F_SETOWN; + break; + case TARGET_F_GETSIG: + ret = F_GETSIG; + break; + case TARGET_F_SETSIG: + ret = F_SETSIG; + break; #if TARGET_ABI_BITS == 32 - case TARGET_F_GETLK64: - return F_GETLK64; - case TARGET_F_SETLK64: - return F_SETLK64; - case TARGET_F_SETLKW64: - return F_SETLKW64; + case TARGET_F_GETLK64: + ret = F_GETLK64; + break; + case TARGET_F_SETLK64: + ret = F_SETLK64; + break; + case TARGET_F_SETLKW64: + ret = F_SETLKW64; + break; #endif - case TARGET_F_SETLEASE: - return F_SETLEASE; - case TARGET_F_GETLEASE: - return F_GETLEASE; + case TARGET_F_SETLEASE: + ret = F_SETLEASE; + break; + case TARGET_F_GETLEASE: + ret = F_GETLEASE; + break; #ifdef F_DUPFD_CLOEXEC - case TARGET_F_DUPFD_CLOEXEC: - return F_DUPFD_CLOEXEC; + case TARGET_F_DUPFD_CLOEXEC: + ret = F_DUPFD_CLOEXEC; + break; #endif - case TARGET_F_NOTIFY: - return F_NOTIFY; + case TARGET_F_NOTIFY: + ret = F_NOTIFY; + break; #ifdef F_GETOWN_EX - case TARGET_F_GETOWN_EX: - return F_GETOWN_EX; + case TARGET_F_GETOWN_EX: + ret = F_GETOWN_EX; + break; #endif #ifdef F_SETOWN_EX - case TARGET_F_SETOWN_EX: - return F_SETOWN_EX; + case TARGET_F_SETOWN_EX: + ret = F_SETOWN_EX; + break; #endif #ifdef F_SETPIPE_SZ - case TARGET_F_SETPIPE_SZ: - return F_SETPIPE_SZ; - case TARGET_F_GETPIPE_SZ: - return F_GETPIPE_SZ; + case TARGET_F_SETPIPE_SZ: + ret = F_SETPIPE_SZ; + break; + case TARGET_F_GETPIPE_SZ: + ret = F_GETPIPE_SZ; + break; #endif - default: - return -TARGET_EINVAL; + default: + ret = -TARGET_EINVAL; + break; } - return -TARGET_EINVAL; + +#if defined(__powerpc64__) + /* On PPC64, glibc headers has the F_*LK* defined to 12, 13 and 14 and + * is not supported by kernel. The glibc fcntl call actually adjusts + * them to 5, 6 and 7 before making the syscall(). Since we make the + * syscall directly, adjust to what is supported by the kernel. + */ + if (ret >= F_GETLK64 && ret <= F_SETLKW64) { + ret -= F_GETLK64 - 5; + } +#endif + + return ret; } #define FLOCK_TRANSTBL \ From af8ab2bf223e1e4406108708b47476ef0d148a52 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Fri, 13 Jul 2018 14:58:05 +0200 Subject: [PATCH 2/4] linux-user: convert remaining fcntl() to safe_fcntl() Commit 435da5e709 didn't convert a fcntl() call to safe_fcntl() for TARGET_NR_fcntl64 case. There is no reason to not use it in this case. Fixes: 435da5e709 linux-user: Use safe_syscall wrapper for fcntl Signed-off-by: Laurent Vivier Message-Id: <20180713125805.10749-1-laurent@vivier.eu> --- linux-user/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index b8b7bced9f..aa4f3eb1c8 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -11764,7 +11764,7 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, if (ret) { break; } - ret = get_errno(fcntl(arg1, cmd, &fl)); + ret = get_errno(safe_fcntl(arg1, cmd, &fl)); if (ret == 0) { ret = copyto(arg3, &fl); } From dc18baaef36d95e5a7b4fbc60daff3d92afe737c Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Sat, 14 Jul 2018 21:35:53 +0200 Subject: [PATCH 3/4] linux-user: fix mmap_find_vma_reserved() The value given by mmap_find_vma_reserved() is used with mmap(), so it is needed to be aligned with the host page size. Since commit 18e80c55bb, reserved_va is only aligned to TARGET_PAGE_SIZE, and it works well if this size is greater or equal to the host page size. But ppc64 hosts have 64kB page size and when we start a 4kiB page size guest (like i386), it fails when it tries to mmap the stack: mmap stack: Invalid argument Fixes: 18e80c55bb (linux-user: Tidy and enforce reserved_va initialization) Signed-off-by: Laurent Vivier Reviewed-by: Richard Henderson Message-Id: <20180714193553.30846-1-laurent@vivier.eu> --- linux-user/main.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/linux-user/main.c b/linux-user/main.c index 52b5a618fe..ea00dd9057 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -78,14 +78,7 @@ int have_guest_base; # endif #endif -/* That said, reserving *too* much vm space via mmap can run into problems - with rlimits, oom due to page table creation, etc. We will still try it, - if directed by the command-line option, but not by default. */ -#if HOST_LONG_BITS == 64 && TARGET_VIRT_ADDR_SPACE_BITS <= 32 -unsigned long reserved_va = MAX_RESERVED_VA; -#else unsigned long reserved_va; -#endif static void usage(int exitcode); @@ -672,6 +665,18 @@ int main(int argc, char **argv, char **envp) /* init tcg before creating CPUs and to get qemu_host_page_size */ tcg_exec_init(0); + /* Reserving *too* much vm space via mmap can run into problems + with rlimits, oom due to page table creation, etc. We will still try it, + if directed by the command-line option, but not by default. */ + if (HOST_LONG_BITS == 64 && + TARGET_VIRT_ADDR_SPACE_BITS <= 32 && + reserved_va == 0) { + /* reserved_va must be aligned with the host page size + * as it is used with mmap() + */ + reserved_va = MAX_RESERVED_VA & qemu_host_page_mask; + } + cpu = cpu_create(cpu_type); env = cpu->env_ptr; cpu_reset(cpu); From 1d3d1b23e1c8f52ec431ddaa8deea1322bc25cbf Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Thu, 12 Jul 2018 00:12:44 +0200 Subject: [PATCH 4/4] Zero out the host's `msg_control` buffer If this is not done, qemu would drop any control message after the first one. This is because glibc's `CMSG_NXTHDR` macro accesses the uninitialized cmsghdr's length field in order to find out if the message fits into the `msg_control` buffer, wrongly assuming that it doesn't because the length field contains garbage. Accessing the length field is fine for completed messages we receive from the kernel, but is - as far as I know - not needed since the kernel won't return such an invalid cmsghdr in the first place. This is tracked as this glibc bug: https://sourceware.org/bugzilla/show_bug.cgi?id=13500 It's probably also a good idea to bail with an error if `CMSG_NXTHDR` returns NULL but `TARGET_CMSG_NXTHDR` doesn't (ie. we still expect cmsgs). Signed-off-by: Jonas Schievink Reviewed-by: Laurent Vivier Message-Id: <20180711221244.31869-1-jonasschievink@gmail.com> Signed-off-by: Laurent Vivier --- linux-user/syscall.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index aa4f3eb1c8..3df3bdffb2 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -3843,6 +3843,8 @@ static abi_long do_sendrecvmsg_locked(int fd, struct target_msghdr *msgp, } msg.msg_controllen = 2 * tswapal(msgp->msg_controllen); msg.msg_control = alloca(msg.msg_controllen); + memset(msg.msg_control, 0, msg.msg_controllen); + msg.msg_flags = tswap32(msgp->msg_flags); count = tswapal(msgp->msg_iovlen);