qemu/include/exec/cpu_ldst.h
Emilio G. Cota 403f290c06 cputlb: read CPUTLBEntry.addr_write atomically
Updates can come from other threads, so readers that do not
take tlb_lock must use atomic_read to avoid undefined
behaviour (UB).

This completes the conversion to tlb_lock. This conversion results
on average in no performance loss, as the following experiments
(run on an Intel i7-6700K CPU @ 4.00GHz) show.

1. aarch64 bootup+shutdown test:

- Before:
 Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):

       7487.087786      task-clock (msec)         #    0.998 CPUs utilized            ( +-  0.12% )
    31,574,905,303      cycles                    #    4.217 GHz                      ( +-  0.12% )
    57,097,908,812      instructions              #    1.81  insns per cycle          ( +-  0.08% )
    10,255,415,367      branches                  # 1369.747 M/sec                    ( +-  0.08% )
       173,278,962      branch-misses             #    1.69% of all branches          ( +-  0.18% )

       7.504481349 seconds time elapsed                                          ( +-  0.14% )

- After:
 Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):

       7462.441328      task-clock (msec)         #    0.998 CPUs utilized            ( +-  0.07% )
    31,478,476,520      cycles                    #    4.218 GHz                      ( +-  0.07% )
    57,017,330,084      instructions              #    1.81  insns per cycle          ( +-  0.05% )
    10,251,929,667      branches                  # 1373.804 M/sec                    ( +-  0.05% )
       173,023,787      branch-misses             #    1.69% of all branches          ( +-  0.11% )

       7.474970463 seconds time elapsed                                          ( +-  0.07% )

2. SPEC06int:
                                              SPEC06int (test set)
                                           [Y axis: Speedup over master]
  1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+
       |                                                                                                  |
   1.1 +-+.................................+++.............................+  tlb-lock-v2 (m+++x)       +-+
       |                                +++ |                   +++        tlb-lock-v3 (spinl|ck)         |
       |                    +++          |  |     +++    +++     |                           |            |
  1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+
       |      ###         ++#| #         |# |# ***### +++### +++#+#     |     +++     |     #|#    ###    |
     1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+
       |    *+* #    #++# ***  #   #### ***  # * *++# ****+# *| * # ****|#   |# #    #|#    #+#    # #    |
  0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+
       |    * * #    #  # *|*  #   #  # *|*  # * *  # *++* # *  * # *  * # * |* #  ++# #    # #  *** #    |
       |    * * #  ++#  # *+*  #   #  # *|*  # * *  # *  * # *  * # *  * # *++* # **** #  ++# #  * * #    |
   0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+
       |    * * #  ***  # * *  #  |#  # *+*  # * *  # *  * # *  * # *  * # *  * # *++* #   |# #  * * #    |
  0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+
       |    * * #  *+*  # * *  # *|*  # * *  # * *  # *  * # *  * # *  * # *  * # *  * # * |* #  * * #    |
       |    * * #  * *  # * *  # *+*  # * *  # * *  # *  * # *  * # *  * # *  * # *  * # * |* #  * * #    |
   0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+
       |    * * #  * *  # * *  # * *  # * *  # * *  # *  * # *  * # *  * # *  * # *  * # *  * #  * * #    |
  0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+
 400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean

  png: https://imgur.com/a/BHzpPTW

Notes:
- tlb-lock-v2 corresponds to an implementation with a mutex.
- tlb-lock-v3 corresponds to the current implementation, i.e.
  a spinlock and a single lock acquisition in tlb_set_page_with_attrs.

Signed-off-by: Emilio G. Cota <cota@braap.org>
Message-Id: <20181016153840.25877-1-cota@braap.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
2018-10-18 19:46:53 -07:00

476 lines
11 KiB
C

/*
* Software MMU support
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*
*/
/*
* Generate inline load/store functions for all MMU modes (typically
* at least _user and _kernel) as well as _data versions, for all data
* sizes.
*
* Used by target op helpers.
*
* The syntax for the accessors is:
*
* load: cpu_ld{sign}{size}_{mmusuffix}(env, ptr)
*
* store: cpu_st{sign}{size}_{mmusuffix}(env, ptr, val)
*
* sign is:
* (empty): for 32 and 64 bit sizes
* u : unsigned
* s : signed
*
* size is:
* b: 8 bits
* w: 16 bits
* l: 32 bits
* q: 64 bits
*
* mmusuffix is one of the generic suffixes "data" or "code", or
* (for softmmu configs) a target-specific MMU mode suffix as defined
* in target cpu.h.
*/
#ifndef CPU_LDST_H
#define CPU_LDST_H
#if defined(CONFIG_USER_ONLY)
/* sparc32plus has 64bit long but 32bit space address
* this can make bad result with g2h() and h2g()
*/
#if TARGET_VIRT_ADDR_SPACE_BITS <= 32
typedef uint32_t abi_ptr;
#define TARGET_ABI_FMT_ptr "%x"
#else
typedef uint64_t abi_ptr;
#define TARGET_ABI_FMT_ptr "%"PRIx64
#endif
/* All direct uses of g2h and h2g need to go away for usermode softmmu. */
#define g2h(x) ((void *)((unsigned long)(abi_ptr)(x) + guest_base))
#define guest_addr_valid(x) ((x) <= GUEST_ADDR_MAX)
#define h2g_valid(x) guest_addr_valid((unsigned long)(x) - guest_base)
static inline int guest_range_valid(unsigned long start, unsigned long len)
{
return len - 1 <= GUEST_ADDR_MAX && start <= GUEST_ADDR_MAX - len + 1;
}
#define h2g_nocheck(x) ({ \
unsigned long __ret = (unsigned long)(x) - guest_base; \
(abi_ptr)__ret; \
})
#define h2g(x) ({ \
/* Check if given address fits target address space */ \
assert(h2g_valid(x)); \
h2g_nocheck(x); \
})
#else
typedef target_ulong abi_ptr;
#define TARGET_ABI_FMT_ptr TARGET_ABI_FMT_lx
#endif
#if defined(CONFIG_USER_ONLY)
extern __thread uintptr_t helper_retaddr;
/* In user-only mode we provide only the _code and _data accessors. */
#define MEMSUFFIX _data
#define DATA_SIZE 1
#include "exec/cpu_ldst_useronly_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_useronly_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_useronly_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_useronly_template.h"
#undef MEMSUFFIX
#define MEMSUFFIX _code
#define CODE_ACCESS
#define DATA_SIZE 1
#include "exec/cpu_ldst_useronly_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_useronly_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_useronly_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_useronly_template.h"
#undef MEMSUFFIX
#undef CODE_ACCESS
#else
/* The memory helpers for tcg-generated code need tcg_target_long etc. */
#include "tcg.h"
static inline target_ulong tlb_addr_write(const CPUTLBEntry *entry)
{
#if TCG_OVERSIZED_GUEST
return entry->addr_write;
#else
return atomic_read(&entry->addr_write);
#endif
}
/* Find the TLB index corresponding to the mmu_idx + address pair. */
static inline uintptr_t tlb_index(CPUArchState *env, uintptr_t mmu_idx,
target_ulong addr)
{
return (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
}
/* Find the TLB entry corresponding to the mmu_idx + address pair. */
static inline CPUTLBEntry *tlb_entry(CPUArchState *env, uintptr_t mmu_idx,
target_ulong addr)
{
return &env->tlb_table[mmu_idx][tlb_index(env, mmu_idx, addr)];
}
#ifdef MMU_MODE0_SUFFIX
#define CPU_MMU_INDEX 0
#define MEMSUFFIX MMU_MODE0_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif
#if (NB_MMU_MODES >= 2) && defined(MMU_MODE1_SUFFIX)
#define CPU_MMU_INDEX 1
#define MEMSUFFIX MMU_MODE1_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif
#if (NB_MMU_MODES >= 3) && defined(MMU_MODE2_SUFFIX)
#define CPU_MMU_INDEX 2
#define MEMSUFFIX MMU_MODE2_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 3) */
#if (NB_MMU_MODES >= 4) && defined(MMU_MODE3_SUFFIX)
#define CPU_MMU_INDEX 3
#define MEMSUFFIX MMU_MODE3_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 4) */
#if (NB_MMU_MODES >= 5) && defined(MMU_MODE4_SUFFIX)
#define CPU_MMU_INDEX 4
#define MEMSUFFIX MMU_MODE4_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 5) */
#if (NB_MMU_MODES >= 6) && defined(MMU_MODE5_SUFFIX)
#define CPU_MMU_INDEX 5
#define MEMSUFFIX MMU_MODE5_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 6) */
#if (NB_MMU_MODES >= 7) && defined(MMU_MODE6_SUFFIX)
#define CPU_MMU_INDEX 6
#define MEMSUFFIX MMU_MODE6_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 7) */
#if (NB_MMU_MODES >= 8) && defined(MMU_MODE7_SUFFIX)
#define CPU_MMU_INDEX 7
#define MEMSUFFIX MMU_MODE7_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 8) */
#if (NB_MMU_MODES >= 9) && defined(MMU_MODE8_SUFFIX)
#define CPU_MMU_INDEX 8
#define MEMSUFFIX MMU_MODE8_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 9) */
#if (NB_MMU_MODES >= 10) && defined(MMU_MODE9_SUFFIX)
#define CPU_MMU_INDEX 9
#define MEMSUFFIX MMU_MODE9_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 10) */
#if (NB_MMU_MODES >= 11) && defined(MMU_MODE10_SUFFIX)
#define CPU_MMU_INDEX 10
#define MEMSUFFIX MMU_MODE10_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 11) */
#if (NB_MMU_MODES >= 12) && defined(MMU_MODE11_SUFFIX)
#define CPU_MMU_INDEX 11
#define MEMSUFFIX MMU_MODE11_SUFFIX
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#endif /* (NB_MMU_MODES >= 12) */
#if (NB_MMU_MODES > 12)
#error "NB_MMU_MODES > 12 is not supported for now"
#endif /* (NB_MMU_MODES > 12) */
/* these access are slower, they must be as rare as possible */
#define CPU_MMU_INDEX (cpu_mmu_index(env, false))
#define MEMSUFFIX _data
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#define CPU_MMU_INDEX (cpu_mmu_index(env, true))
#define MEMSUFFIX _code
#define SOFTMMU_CODE_ACCESS
#define DATA_SIZE 1
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 2
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 4
#include "exec/cpu_ldst_template.h"
#define DATA_SIZE 8
#include "exec/cpu_ldst_template.h"
#undef CPU_MMU_INDEX
#undef MEMSUFFIX
#undef SOFTMMU_CODE_ACCESS
#endif /* defined(CONFIG_USER_ONLY) */
/**
* tlb_vaddr_to_host:
* @env: CPUArchState
* @addr: guest virtual address to look up
* @access_type: 0 for read, 1 for write, 2 for execute
* @mmu_idx: MMU index to use for lookup
*
* Look up the specified guest virtual index in the TCG softmmu TLB.
* If the TLB contains a host virtual address suitable for direct RAM
* access, then return it. Otherwise (TLB miss, TLB entry is for an
* I/O access, etc) return NULL.
*
* This is the equivalent of the initial fast-path code used by
* TCG backends for guest load and store accesses.
*/
static inline void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
int access_type, int mmu_idx)
{
#if defined(CONFIG_USER_ONLY)
return g2h(addr);
#else
CPUTLBEntry *tlbentry = tlb_entry(env, mmu_idx, addr);
abi_ptr tlb_addr;
uintptr_t haddr;
switch (access_type) {
case 0:
tlb_addr = tlbentry->addr_read;
break;
case 1:
tlb_addr = tlb_addr_write(tlbentry);
break;
case 2:
tlb_addr = tlbentry->addr_code;
break;
default:
g_assert_not_reached();
}
if (!tlb_hit(tlb_addr, addr)) {
/* TLB entry is for a different page */
return NULL;
}
if (tlb_addr & ~TARGET_PAGE_MASK) {
/* IO access */
return NULL;
}
haddr = addr + tlbentry->addend;
return (void *)haddr;
#endif /* defined(CONFIG_USER_ONLY) */
}
#endif /* CPU_LDST_H */