qemu/target/riscv/translate.c

645 lines
18 KiB
C
Raw Normal View History

/*
* RISC-V emulation for qemu: main translation routines.
*
* Copyright (c) 2016-2017 Sagar Karandikar, sagark@eecs.berkeley.edu
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2 or later, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
#include "qemu/log.h"
#include "cpu.h"
#include "tcg/tcg-op.h"
#include "disas/disas.h"
#include "exec/cpu_ldst.h"
#include "exec/exec-all.h"
#include "exec/helper-proto.h"
#include "exec/helper-gen.h"
#include "exec/translator.h"
#include "exec/log.h"
#include "instmap.h"
/* global register indices */
static TCGv cpu_gpr[32], cpu_pc, cpu_vl;
static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
static TCGv load_res;
static TCGv load_val;
#include "exec/gen-icount.h"
/*
* If an operation is being performed on less than TARGET_LONG_BITS,
* it may require the inputs to be sign- or zero-extended; which will
* depend on the exact operation being performed.
*/
typedef enum {
EXT_NONE,
EXT_SIGN,
EXT_ZERO,
} DisasExtend;
typedef struct DisasContext {
DisasContextBase base;
/* pc_succ_insn points to the instruction following base.pc_next */
target_ulong pc_succ_insn;
target_ulong priv_ver;
target_ulong misa;
uint32_t opcode;
uint32_t mstatus_fs;
uint32_t mstatus_hs_fs;
uint32_t mem_idx;
/* Remember the rounding mode encoded in the previous fp instruction,
which we have already installed into env->fp_status. Or -1 for
no previous fp instruction. Note that we exit the TB when writing
to any system register, which includes CSR_FRM, so we do not have
to reset this known value. */
int frm;
bool w;
bool virt_enabled;
bool ext_ifencei;
bool hlsx;
/* vector extension */
bool vill;
uint8_t lmul;
uint8_t sew;
uint16_t vlen;
uint16_t mlen;
bool vl_eq_vlmax;
uint8_t ntemp;
CPUState *cs;
TCGv zero;
/* Space for 3 operands plus 1 extra for address computation. */
TCGv temp[4];
} DisasContext;
static inline bool has_ext(DisasContext *ctx, uint32_t ext)
{
return ctx->misa & ext;
}
#ifdef TARGET_RISCV32
# define is_32bit(ctx) true
#elif defined(CONFIG_USER_ONLY)
# define is_32bit(ctx) false
#else
static inline bool is_32bit(DisasContext *ctx)
{
return (ctx->misa & RV32) == RV32;
}
#endif
/* The word size for this operation. */
static inline int oper_len(DisasContext *ctx)
{
return ctx->w ? 32 : TARGET_LONG_BITS;
}
/*
* RISC-V requires NaN-boxing of narrower width floating point values.
* This applies when a 32-bit value is assigned to a 64-bit FP register.
* For consistency and simplicity, we nanbox results even when the RVD
* extension is not present.
*/
static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
{
tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
}
/*
* A narrow n-bit operation, where n < FLEN, checks that input operands
* are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
* If so, the least-significant bits of the input are used, otherwise the
* input value is treated as an n-bit canonical NaN (v2.2 section 9.2).
*
* Here, the result is always nan-boxed, even the canonical nan.
*/
static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
{
TCGv_i64 t_max = tcg_constant_i64(0xffffffff00000000ull);
TCGv_i64 t_nan = tcg_constant_i64(0xffffffff7fc00000ull);
tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
}
static void generate_exception(DisasContext *ctx, int excp)
{
tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp));
ctx->base.is_jmp = DISAS_NORETURN;
}
static void generate_exception_mtval(DisasContext *ctx, int excp)
{
tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
tcg_gen_st_tl(cpu_pc, cpu_env, offsetof(CPURISCVState, badaddr));
gen_helper_raise_exception(cpu_env, tcg_constant_i32(excp));
ctx->base.is_jmp = DISAS_NORETURN;
}
static void gen_exception_illegal(DisasContext *ctx)
{
generate_exception(ctx, RISCV_EXCP_ILLEGAL_INST);
}
static void gen_exception_inst_addr_mis(DisasContext *ctx)
{
generate_exception_mtval(ctx, RISCV_EXCP_INST_ADDR_MIS);
}
static void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
{
if (translator_use_goto_tb(&ctx->base, dest)) {
tcg_gen_goto_tb(n);
tcg_gen_movi_tl(cpu_pc, dest);
tcg_gen_exit_tb(ctx->base.tb, n);
} else {
tcg_gen_movi_tl(cpu_pc, dest);
tcg_gen_lookup_and_goto_ptr();
}
}
/*
* Wrappers for getting reg values.
*
* The $zero register does not have cpu_gpr[0] allocated -- we supply the
* constant zero as a source, and an uninitialized sink as destination.
*
* Further, we may provide an extension for word operations.
*/
static TCGv temp_new(DisasContext *ctx)
{
assert(ctx->ntemp < ARRAY_SIZE(ctx->temp));
return ctx->temp[ctx->ntemp++] = tcg_temp_new();
}
static TCGv get_gpr(DisasContext *ctx, int reg_num, DisasExtend ext)
{
TCGv t;
if (reg_num == 0) {
return ctx->zero;
}
switch (ctx->w ? ext : EXT_NONE) {
case EXT_NONE:
return cpu_gpr[reg_num];
case EXT_SIGN:
t = temp_new(ctx);
tcg_gen_ext32s_tl(t, cpu_gpr[reg_num]);
return t;
case EXT_ZERO:
t = temp_new(ctx);
tcg_gen_ext32u_tl(t, cpu_gpr[reg_num]);
return t;
}
g_assert_not_reached();
}
static TCGv dest_gpr(DisasContext *ctx, int reg_num)
{
if (reg_num == 0 || ctx->w) {
return temp_new(ctx);
}
return cpu_gpr[reg_num];
}
static void gen_set_gpr(DisasContext *ctx, int reg_num, TCGv t)
{
if (reg_num != 0) {
if (ctx->w) {
tcg_gen_ext32s_tl(cpu_gpr[reg_num], t);
} else {
tcg_gen_mov_tl(cpu_gpr[reg_num], t);
}
}
}
static void gen_jal(DisasContext *ctx, int rd, target_ulong imm)
{
target_ulong next_pc;
/* check misaligned: */
next_pc = ctx->base.pc_next + imm;
if (!has_ext(ctx, RVC)) {
if ((next_pc & 0x3) != 0) {
gen_exception_inst_addr_mis(ctx);
return;
}
}
if (rd != 0) {
tcg_gen_movi_tl(cpu_gpr[rd], ctx->pc_succ_insn);
}
gen_goto_tb(ctx, 0, ctx->base.pc_next + imm); /* must use this for safety */
ctx->base.is_jmp = DISAS_NORETURN;
}
#ifndef CONFIG_USER_ONLY
/* The states of mstatus_fs are:
* 0 = disabled, 1 = initial, 2 = clean, 3 = dirty
* We will have already diagnosed disabled state,
* and need to turn initial/clean into dirty.
*/
static void mark_fs_dirty(DisasContext *ctx)
{
TCGv tmp;
target_ulong sd = is_32bit(ctx) ? MSTATUS32_SD : MSTATUS64_SD;
if (ctx->mstatus_fs != MSTATUS_FS) {
/* Remember the state change for the rest of the TB. */
ctx->mstatus_fs = MSTATUS_FS;
tmp = tcg_temp_new();
tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
tcg_gen_ori_tl(tmp, tmp, MSTATUS_FS | sd);
tcg_gen_st_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
tcg_temp_free(tmp);
}
if (ctx->virt_enabled && ctx->mstatus_hs_fs != MSTATUS_FS) {
/* Remember the stage change for the rest of the TB. */
ctx->mstatus_hs_fs = MSTATUS_FS;
tmp = tcg_temp_new();
tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus_hs));
tcg_gen_ori_tl(tmp, tmp, MSTATUS_FS | sd);
tcg_gen_st_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus_hs));
tcg_temp_free(tmp);
}
}
#else
static inline void mark_fs_dirty(DisasContext *ctx) { }
#endif
static void gen_set_rm(DisasContext *ctx, int rm)
{
if (ctx->frm == rm) {
return;
}
ctx->frm = rm;
gen_helper_set_rounding_mode(cpu_env, tcg_constant_i32(rm));
}
static int ex_plus_1(DisasContext *ctx, int nf)
{
return nf + 1;
}
#define EX_SH(amount) \
static int ex_shift_##amount(DisasContext *ctx, int imm) \
{ \
return imm << amount; \
}
EX_SH(1)
EX_SH(2)
EX_SH(3)
EX_SH(4)
EX_SH(12)
#define REQUIRE_EXT(ctx, ext) do { \
if (!has_ext(ctx, ext)) { \
return false; \
} \
} while (0)
#define REQUIRE_32BIT(ctx) do { \
if (!is_32bit(ctx)) { \
return false; \
} \
} while (0)
#define REQUIRE_64BIT(ctx) do { \
if (is_32bit(ctx)) { \
return false; \
} \
} while (0)
static int ex_rvc_register(DisasContext *ctx, int reg)
{
return 8 + reg;
}
static int ex_rvc_shifti(DisasContext *ctx, int imm)
{
/* For RV128 a shamt of 0 means a shift by 64. */
return imm ? imm : 64;
}
/* Include the auto-generated decoder for 32 bit insn */
#include "decode-insn32.c.inc"
static bool gen_arith_imm_fn(DisasContext *ctx, arg_i *a, DisasExtend ext,
void (*func)(TCGv, TCGv, target_long))
{
TCGv dest = dest_gpr(ctx, a->rd);
TCGv src1 = get_gpr(ctx, a->rs1, ext);
func(dest, src1, a->imm);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
static bool gen_arith_imm_tl(DisasContext *ctx, arg_i *a, DisasExtend ext,
void (*func)(TCGv, TCGv, TCGv))
{
TCGv dest = dest_gpr(ctx, a->rd);
TCGv src1 = get_gpr(ctx, a->rs1, ext);
TCGv src2 = tcg_constant_tl(a->imm);
func(dest, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
static bool gen_arith(DisasContext *ctx, arg_r *a, DisasExtend ext,
void (*func)(TCGv, TCGv, TCGv))
{
TCGv dest = dest_gpr(ctx, a->rd);
TCGv src1 = get_gpr(ctx, a->rs1, ext);
TCGv src2 = get_gpr(ctx, a->rs2, ext);
func(dest, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
static bool gen_shift_imm_fn(DisasContext *ctx, arg_shift *a, DisasExtend ext,
void (*func)(TCGv, TCGv, target_long))
{
TCGv dest, src1;
int max_len = oper_len(ctx);
if (a->shamt >= max_len) {
return false;
}
dest = dest_gpr(ctx, a->rd);
src1 = get_gpr(ctx, a->rs1, ext);
func(dest, src1, a->shamt);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
static bool gen_shift_imm_tl(DisasContext *ctx, arg_shift *a, DisasExtend ext,
void (*func)(TCGv, TCGv, TCGv))
{
TCGv dest, src1, src2;
int max_len = oper_len(ctx);
if (a->shamt >= max_len) {
return false;
}
dest = dest_gpr(ctx, a->rd);
src1 = get_gpr(ctx, a->rs1, ext);
src2 = tcg_constant_tl(a->shamt);
func(dest, src1, src2);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
static bool gen_shift(DisasContext *ctx, arg_r *a, DisasExtend ext,
void (*func)(TCGv, TCGv, TCGv))
{
TCGv dest = dest_gpr(ctx, a->rd);
TCGv src1 = get_gpr(ctx, a->rs1, ext);
TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE);
TCGv ext2 = tcg_temp_new();
tcg_gen_andi_tl(ext2, src2, oper_len(ctx) - 1);
func(dest, src1, ext2);
gen_set_gpr(ctx, a->rd, dest);
tcg_temp_free(ext2);
return true;
}
static bool gen_unary(DisasContext *ctx, arg_r2 *a, DisasExtend ext,
void (*func)(TCGv, TCGv))
{
TCGv dest = dest_gpr(ctx, a->rd);
TCGv src1 = get_gpr(ctx, a->rs1, ext);
func(dest, src1);
gen_set_gpr(ctx, a->rd, dest);
return true;
}
static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc)
{
DisasContext *ctx = container_of(dcbase, DisasContext, base);
CPUState *cpu = ctx->cs;
CPURISCVState *env = cpu->env_ptr;
return cpu_ldl_code(env, pc);
}
/* Include insn module translation function */
#include "insn_trans/trans_rvi.c.inc"
#include "insn_trans/trans_rvm.c.inc"
#include "insn_trans/trans_rva.c.inc"
#include "insn_trans/trans_rvf.c.inc"
#include "insn_trans/trans_rvd.c.inc"
#include "insn_trans/trans_rvh.c.inc"
#include "insn_trans/trans_rvv.c.inc"
#include "insn_trans/trans_rvb.c.inc"
#include "insn_trans/trans_privileged.c.inc"
/* Include the auto-generated decoder for 16 bit insn */
#include "decode-insn16.c.inc"
static void decode_opc(CPURISCVState *env, DisasContext *ctx, uint16_t opcode)
{
/* check for compressed insn */
if (extract16(opcode, 0, 2) != 3) {
if (!has_ext(ctx, RVC)) {
gen_exception_illegal(ctx);
} else {
ctx->pc_succ_insn = ctx->base.pc_next + 2;
if (!decode_insn16(ctx, opcode)) {
gen_exception_illegal(ctx);
}
}
} else {
uint32_t opcode32 = opcode;
opcode32 = deposit32(opcode32, 16, 16,
translator_lduw(env, &ctx->base,
ctx->base.pc_next + 2));
ctx->pc_succ_insn = ctx->base.pc_next + 4;
if (!decode_insn32(ctx, opcode32)) {
gen_exception_illegal(ctx);
}
}
}
static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
{
DisasContext *ctx = container_of(dcbase, DisasContext, base);
CPURISCVState *env = cs->env_ptr;
RISCVCPU *cpu = RISCV_CPU(cs);
uint32_t tb_flags = ctx->base.tb->flags;
ctx->pc_succ_insn = ctx->base.pc_first;
ctx->mem_idx = tb_flags & TB_FLAGS_MMU_MASK;
ctx->mstatus_fs = tb_flags & TB_FLAGS_MSTATUS_FS;
ctx->priv_ver = env->priv_ver;
#if !defined(CONFIG_USER_ONLY)
if (riscv_has_ext(env, RVH)) {
ctx->virt_enabled = riscv_cpu_virt_enabled(env);
} else {
ctx->virt_enabled = false;
}
#else
ctx->virt_enabled = false;
#endif
ctx->misa = env->misa;
ctx->frm = -1; /* unknown rounding mode */
ctx->ext_ifencei = cpu->cfg.ext_ifencei;
ctx->vlen = cpu->cfg.vlen;
ctx->mstatus_hs_fs = FIELD_EX32(tb_flags, TB_FLAGS, MSTATUS_HS_FS);
ctx->hlsx = FIELD_EX32(tb_flags, TB_FLAGS, HLSX);
ctx->vill = FIELD_EX32(tb_flags, TB_FLAGS, VILL);
ctx->sew = FIELD_EX32(tb_flags, TB_FLAGS, SEW);
ctx->lmul = FIELD_EX32(tb_flags, TB_FLAGS, LMUL);
ctx->mlen = 1 << (ctx->sew + 3 - ctx->lmul);
ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
ctx->cs = cs;
ctx->w = false;
ctx->ntemp = 0;
memset(ctx->temp, 0, sizeof(ctx->temp));
ctx->zero = tcg_constant_tl(0);
}
static void riscv_tr_tb_start(DisasContextBase *db, CPUState *cpu)
{
}
static void riscv_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
{
DisasContext *ctx = container_of(dcbase, DisasContext, base);
tcg_gen_insn_start(ctx->base.pc_next);
}
static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
{
DisasContext *ctx = container_of(dcbase, DisasContext, base);
CPURISCVState *env = cpu->env_ptr;
uint16_t opcode16 = translator_lduw(env, &ctx->base, ctx->base.pc_next);
decode_opc(env, ctx, opcode16);
ctx->base.pc_next = ctx->pc_succ_insn;
ctx->w = false;
for (int i = ctx->ntemp - 1; i >= 0; --i) {
tcg_temp_free(ctx->temp[i]);
ctx->temp[i] = NULL;
}
ctx->ntemp = 0;
if (ctx->base.is_jmp == DISAS_NEXT) {
target_ulong page_start;
page_start = ctx->base.pc_first & TARGET_PAGE_MASK;
if (ctx->base.pc_next - page_start >= TARGET_PAGE_SIZE) {
ctx->base.is_jmp = DISAS_TOO_MANY;
}
}
}
static void riscv_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
{
DisasContext *ctx = container_of(dcbase, DisasContext, base);
switch (ctx->base.is_jmp) {
case DISAS_TOO_MANY:
target/riscv: call gen_goto_tb on DISAS_TOO_MANY Performance impact of this and the previous commits, measured with the very-easy-to-cross-compile rv8-bench: https://github.com/rv8-io/rv8-bench Host: Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz - Key: before: master after1,2,3: the 3 commits in this series (i.e. 3 is this commit) - User-mode: bench before after1 after2 after3 final speedup --------------------------------------------------------- aes 1.12s 1.12s 1.10s 1.00s 1.12 bigint 0.78s 0.78s 0.78s 0.78s 1 dhrystone 0.96s 0.97s 0.49s 0.49s 1.9591837 miniz 1.94s 1.94s 1.88s 1.86s 1.0430108 norx 0.51s 0.51s 0.49s 0.48s 1.0625 primes 0.85s 0.85s 0.84s 0.84s 1.0119048 qsort 4.87s 4.88s 1.86s 1.86s 2.6182796 sha512 0.76s 0.77s 0.64s 0.64s 1.1875 (after1 only applies to softmmu, so no surprises here) - Full-system (fedora): bench before after1 after2 after3 final speedup --------------------------------------------------------- aes 2.68s 2.54s 2.60s 2.34s 1.1452991 bigint 1.61s 1.56s 1.55s 1.64s 0.98170732 dhrystone 1.78s 1.67s 1.25s 1.24s 1.4354839 miniz 3.53s 3.35s 3.28s 3.35s 1.0537313 norx 1.13s 1.09s 1.07s 1.06s 1.0660377 primes 15.37s 15.41s 15.20s 15.37s 1 qsort 7.20s 6.71s 3.85s 3.96s 1.8181818 sha512 1.07s 1.04s 0.90s 0.90s 1.1888889 SoftMMU slows things down, so the numbers are less sensitive. Cross-page jumps improve things a little bit, though. Note that I'm not showing here averages, just results from a single run, so with primes there isn't much to worry about. Signed-off-by: Emilio G. Cota <cota@braap.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
2018-07-29 05:14:34 +03:00
gen_goto_tb(ctx, 0, ctx->base.pc_next);
break;
case DISAS_NORETURN:
break;
default:
g_assert_not_reached();
}
}
static void riscv_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
{
#ifndef CONFIG_USER_ONLY
RISCVCPU *rvcpu = RISCV_CPU(cpu);
CPURISCVState *env = &rvcpu->env;
#endif
qemu_log("IN: %s\n", lookup_symbol(dcbase->pc_first));
#ifndef CONFIG_USER_ONLY
qemu_log("Priv: "TARGET_FMT_ld"; Virt: "TARGET_FMT_ld"\n", env->priv, env->virt);
#endif
log_target_disas(cpu, dcbase->pc_first, dcbase->tb->size);
}
static const TranslatorOps riscv_tr_ops = {
.init_disas_context = riscv_tr_init_disas_context,
.tb_start = riscv_tr_tb_start,
.insn_start = riscv_tr_insn_start,
.translate_insn = riscv_tr_translate_insn,
.tb_stop = riscv_tr_tb_stop,
.disas_log = riscv_tr_disas_log,
};
void gen_intermediate_code(CPUState *cs, TranslationBlock *tb, int max_insns)
{
DisasContext ctx;
translator_loop(&riscv_tr_ops, &ctx.base, cs, tb, max_insns);
}
void riscv_translate_init(void)
{
int i;
/*
* cpu_gpr[0] is a placeholder for the zero register. Do not use it.
* Use the gen_set_gpr and get_gpr helper functions when accessing regs,
* unless you specifically block reads/writes to reg 0.
*/
cpu_gpr[0] = NULL;
for (i = 1; i < 32; i++) {
cpu_gpr[i] = tcg_global_mem_new(cpu_env,
offsetof(CPURISCVState, gpr[i]), riscv_int_regnames[i]);
}
for (i = 0; i < 32; i++) {
cpu_fpr[i] = tcg_global_mem_new_i64(cpu_env,
offsetof(CPURISCVState, fpr[i]), riscv_fpr_regnames[i]);
}
cpu_pc = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, pc), "pc");
cpu_vl = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, vl), "vl");
load_res = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_res),
"load_res");
load_val = tcg_global_mem_new(cpu_env, offsetof(CPURISCVState, load_val),
"load_val");
}