367189efae
We weren't using cs_base in the hash calculations before. Since the
arm front end moved a chunk of flags in a378206a20
(target/arm: Move
mode specific TB flags to tb->cs_base) they comprise of an important
part of the execution state.
Widen the tb_hash_func to include cs_base and expand to qemu_xxhash8()
to accommodate it.
My initial benchmark shows very little difference in the
runtime.
Before:
armhf
➜ hyperfine -w 2 -m 20 "./arm-softmmu/qemu-system-arm -cpu cortex-a15 -machine type=virt,highmem=off -display none -m 2048 -serial mon:stdio -netdev user,id=unet,hostfwd=tcp::2222-:22 -device virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf -device scsi-hd,drive=hd -smp 4 -kernel /home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage -append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' -snapshot"
Benchmark 1: ./arm-softmmu/qemu-system-arm -cpu cortex-a15 -machine type=virt,highmem=off -display none -m 2048 -serial mon:stdio -netdev user,id=unet,hostfwd=tcp::2222-:22 -device virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf -device scsi-hd,drive=hd -smp 4 -kernel /home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage -append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' -snapshot
Time (mean ± σ): 24.627 s ± 2.708 s [User: 34.309 s, System: 1.797 s]
Range (min … max): 22.345 s … 29.864 s 20 runs
arm64
➜ hyperfine -w 2 -n 20 "./qemu-system-aarch64 -cpu max,pauth-impdef=on -machine type=virt,virtualization=on,gic-version=3 -display none -serial mon:stdio -netdev user,id=unet,hostfwd=tcp::2222-:22,hostfwd=tcp::1234-:1234 -device virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-arm64 -device scsi-hd,drive=hd -smp 4 -kernel ~/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image.gz -append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark-pigz.service' -snapshot"
Benchmark 1: 20
Time (mean ± σ): 62.559 s ± 2.917 s [User: 189.115 s, System: 4.089 s]
Range (min … max): 59.997 s … 70.153 s 10 runs
After:
armhf
Benchmark 1: ./arm-softmmu/qemu-system-arm -cpu cortex-a15 -machine type=virt,highmem=off -display none -m 2048 -serial mon:stdio -netdev user,id=unet,hostfwd=tcp::2222-:22 -device virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-armhf -device scsi-hd,drive=hd -smp 4 -kernel /home/alex/lsrc/linux.git/builds/arm/arch/arm/boot/zImage -append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark.service' -snapshot
Time (mean ± σ): 24.223 s ± 2.151 s [User: 34.284 s, System: 1.906 s]
Range (min … max): 22.000 s … 28.476 s 20 runs
arm64
hyperfine -w 2 -n 20 "./qemu-system-aarch64 -cpu max,pauth-impdef=on -machine type=virt,virtualization=on,gic-version=3 -display none -serial mon:stdio -netdev user,id=unet,hostfwd=tcp::2222-:22,hostfwd=tcp::1234-:1234 -device virtio-net-pci,netdev=unet -device virtio-scsi-pci -blockdev driver=raw,node-name=hd,discard=unmap,file.driver=host_device,file.filename=/dev/zen-disk/debian-bullseye-arm64 -device scsi-hd,drive=hd -smp 4 -kernel ~/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image.gz -append 'console=ttyAMA0 root=/dev/sda2 systemd.unit=benchmark-pigz.service' -snapshot"
Benchmark 1: 20
Time (mean ± σ): 62.769 s ± 1.978 s [User: 188.431 s, System: 5.269 s]
Range (min … max): 60.285 s … 66.868 s 10 runs
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20230526165401.574474-12-alex.bennee@linaro.org
Message-Id: <20230524133952.3971948-11-alex.bennee@linaro.org>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
71 lines
2.3 KiB
C
71 lines
2.3 KiB
C
/*
|
|
* internal execution defines for qemu
|
|
*
|
|
* Copyright (c) 2003 Fabrice Bellard
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#ifndef EXEC_TB_HASH_H
|
|
#define EXEC_TB_HASH_H
|
|
|
|
#include "exec/cpu-defs.h"
|
|
#include "exec/exec-all.h"
|
|
#include "qemu/xxhash.h"
|
|
#include "tb-jmp-cache.h"
|
|
|
|
#ifdef CONFIG_SOFTMMU
|
|
|
|
/* Only the bottom TB_JMP_PAGE_BITS of the jump cache hash bits vary for
|
|
addresses on the same page. The top bits are the same. This allows
|
|
TLB invalidation to quickly clear a subset of the hash table. */
|
|
#define TB_JMP_PAGE_BITS (TB_JMP_CACHE_BITS / 2)
|
|
#define TB_JMP_PAGE_SIZE (1 << TB_JMP_PAGE_BITS)
|
|
#define TB_JMP_ADDR_MASK (TB_JMP_PAGE_SIZE - 1)
|
|
#define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE)
|
|
|
|
static inline unsigned int tb_jmp_cache_hash_page(target_ulong pc)
|
|
{
|
|
target_ulong tmp;
|
|
tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS));
|
|
return (tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK;
|
|
}
|
|
|
|
static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
|
|
{
|
|
target_ulong tmp;
|
|
tmp = pc ^ (pc >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS));
|
|
return (((tmp >> (TARGET_PAGE_BITS - TB_JMP_PAGE_BITS)) & TB_JMP_PAGE_MASK)
|
|
| (tmp & TB_JMP_ADDR_MASK));
|
|
}
|
|
|
|
#else
|
|
|
|
/* In user-mode we can get better hashing because we do not have a TLB */
|
|
static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
|
|
{
|
|
return (pc ^ (pc >> TB_JMP_CACHE_BITS)) & (TB_JMP_CACHE_SIZE - 1);
|
|
}
|
|
|
|
#endif /* CONFIG_SOFTMMU */
|
|
|
|
static inline
|
|
uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
|
|
uint32_t flags, uint64_t flags2, uint32_t cf_mask)
|
|
{
|
|
return qemu_xxhash8(phys_pc, pc, flags2, flags, cf_mask);
|
|
}
|
|
|
|
#endif
|