From 29f3ff8d6cbc28f79933aeaa25805408d0984a8f Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Fri, 10 Jul 2015 18:03:31 +0200 Subject: [PATCH 01/18] tcg/optimize: fix constant signedness By convention, on a 64-bit host TCG internally stores 32-bit constants as sign-extended. This is not the case in the optimizer when a 32-bit constant is folded. This doesn't seem to have more consequences than suboptimal code generation. For instance the x86 backend assumes sign-extended constants, and in some rare cases uses a 32-bit unsigned immediate 0xffffffff instead of a 8-bit signed immediate 0xff for the constant -1. This is with a ppc guest: before ------ ---- 0x9f29cc movi_i32 tmp1,$0xffffffff movi_i32 tmp2,$0x0 add2_i32 tmp0,CA,CA,tmp2,r6,tmp2 add2_i32 tmp0,CA,tmp0,CA,tmp1,tmp2 mov_i32 r10,tmp0 0x7fd8c7dfe90c: xor %ebp,%ebp 0x7fd8c7dfe90e: mov %ebp,%r11d 0x7fd8c7dfe911: mov 0x18(%r14),%r9d 0x7fd8c7dfe915: add %r9d,%r10d 0x7fd8c7dfe918: adc %ebp,%r11d 0x7fd8c7dfe91b: add $0xffffffff,%r10d 0x7fd8c7dfe922: adc %ebp,%r11d 0x7fd8c7dfe925: mov %r11d,0x134(%r14) 0x7fd8c7dfe92c: mov %r10d,0x28(%r14) after ----- ---- 0x9f29cc movi_i32 tmp1,$0xffffffffffffffff movi_i32 tmp2,$0x0 add2_i32 tmp0,CA,CA,tmp2,r6,tmp2 add2_i32 tmp0,CA,tmp0,CA,tmp1,tmp2 mov_i32 r10,tmp0 0x7f37010d490c: xor %ebp,%ebp 0x7f37010d490e: mov %ebp,%r11d 0x7f37010d4911: mov 0x18(%r14),%r9d 0x7f37010d4915: add %r9d,%r10d 0x7f37010d4918: adc %ebp,%r11d 0x7f37010d491b: add $0xffffffffffffffff,%r10d 0x7f37010d491f: adc %ebp,%r11d 0x7f37010d4922: mov %r11d,0x134(%r14) 0x7f37010d4929: mov %r10d,0x28(%r14) Signed-off-by: Aurelien Jarno Message-Id: <1436544211-2769-2-git-send-email-aurelien@aurel32.net> Signed-off-by: Richard Henderson --- tcg/optimize.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 18283cfd7b..cd0e793a9d 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -395,7 +395,7 @@ static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) { TCGArg res = do_constant_folding_2(op, x, y); if (op_bits(op) == 32) { - res &= 0xffffffff; + res = (int32_t)res; } return res; } @@ -1128,8 +1128,8 @@ void tcg_optimize(TCGContext *s) rl = args[0]; rh = args[1]; - tcg_opt_gen_movi(s, op, args, rl, (uint32_t)a); - tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(a >> 32)); + tcg_opt_gen_movi(s, op, args, rl, (int32_t)a); + tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32)); /* We've done all we need to do with the movi. Skip it. */ oi_next = op2->next; @@ -1149,8 +1149,8 @@ void tcg_optimize(TCGContext *s) rl = args[0]; rh = args[1]; - tcg_opt_gen_movi(s, op, args, rl, (uint32_t)r); - tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(r >> 32)); + tcg_opt_gen_movi(s, op, args, rl, (int32_t)r); + tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32)); /* We've done all we need to do with the movi. Skip it. */ oi_next = op2->next; From 1208d7dd5fddc1fbd98de800d17429b4e5578848 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 27 Jul 2015 12:41:44 +0200 Subject: [PATCH 02/18] tcg/optimize: optimize temps tracking The tcg_temp_info structure uses 24 bytes per temp. Now that we emulate vector registers on most guests, it's not uncommon to have more than 100 used temps. This means we have initialize more than 2kB at least twice per TB, often more when there is a few goto_tb. Instead used a TCGTempSet bit array to track which temps are in used in the current basic block. This means there are only around 16 bytes to initialize. This improves the boot time of a MIPS guest on an x86-64 host by around 7% and moves out tcg_optimize from the the top of the profiler list. [rth: Handle TCG_CALL_DUMMY_ARG] Signed-off-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/optimize.c | 43 ++++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index cd0e793a9d..413920f261 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -50,6 +50,7 @@ struct tcg_temp_info { }; static struct tcg_temp_info temps[TCG_MAX_TEMPS]; +static TCGTempSet temps_used; /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove the copy flag from the left temp. */ @@ -67,6 +68,22 @@ static void reset_temp(TCGArg temp) temps[temp].mask = -1; } +/* Reset all temporaries, given that there are NB_TEMPS of them. */ +static void reset_all_temps(int nb_temps) +{ + bitmap_zero(temps_used.l, nb_temps); +} + +/* Initialize and activate a temporary. */ +static void init_temp_info(TCGArg temp) +{ + if (!test_bit(temp, temps_used.l)) { + temps[temp].state = TCG_TEMP_UNDEF; + temps[temp].mask = -1; + set_bit(temp, temps_used.l); + } +} + static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc, int nargs) { @@ -98,16 +115,6 @@ static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, return new_op; } -/* Reset all temporaries, given that there are NB_TEMPS of them. */ -static void reset_all_temps(int nb_temps) -{ - int i; - for (i = 0; i < nb_temps; i++) { - temps[i].state = TCG_TEMP_UNDEF; - temps[i].mask = -1; - } -} - static int op_bits(TCGOpcode op) { const TCGOpDef *def = &tcg_op_defs[op]; @@ -598,12 +605,24 @@ void tcg_optimize(TCGContext *s) const TCGOpDef *def = &tcg_op_defs[opc]; oi_next = op->next; + + /* Count the arguments, and initialize the temps that are + going to be used */ if (opc == INDEX_op_call) { nb_oargs = op->callo; nb_iargs = op->calli; + for (i = 0; i < nb_oargs + nb_iargs; i++) { + tmp = args[i]; + if (tmp != TCG_CALL_DUMMY_ARG) { + init_temp_info(tmp); + } + } } else { nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; + for (i = 0; i < nb_oargs + nb_iargs; i++) { + init_temp_info(args[i]); + } } /* Do copy propagation */ @@ -1299,7 +1318,9 @@ void tcg_optimize(TCGContext *s) if (!(args[nb_oargs + nb_iargs + 1] & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { for (i = 0; i < nb_globals; i++) { - reset_temp(i); + if (test_bit(i, temps_used.l)) { + reset_temp(i); + } } } goto do_reset_output; From d9c769c60948815ee03b2684b1c1c68ee4375149 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 27 Jul 2015 12:41:44 +0200 Subject: [PATCH 03/18] tcg/optimize: add temp_is_const and temp_is_copy functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add two accessor functions temp_is_const and temp_is_copy, to make the code more readable and make code change easier. Reviewed-by: Alex Bennée Signed-off-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/optimize.c | 131 ++++++++++++++++++++++--------------------------- 1 file changed, 60 insertions(+), 71 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 413920f261..c058d04ec0 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -52,11 +52,21 @@ struct tcg_temp_info { static struct tcg_temp_info temps[TCG_MAX_TEMPS]; static TCGTempSet temps_used; +static inline bool temp_is_const(TCGArg arg) +{ + return temps[arg].state == TCG_TEMP_CONST; +} + +static inline bool temp_is_copy(TCGArg arg) +{ + return temps[arg].state == TCG_TEMP_COPY; +} + /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove the copy flag from the left temp. */ static void reset_temp(TCGArg temp) { - if (temps[temp].state == TCG_TEMP_COPY) { + if (temp_is_copy(temp)) { if (temps[temp].prev_copy == temps[temp].next_copy) { temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; } else { @@ -186,8 +196,7 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2) return true; } - if (temps[arg1].state != TCG_TEMP_COPY - || temps[arg2].state != TCG_TEMP_COPY) { + if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) { return false; } @@ -230,7 +239,7 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, return; } - if (temps[src].state == TCG_TEMP_CONST) { + if (temp_is_const(src)) { tcg_opt_gen_movi(s, op, args, dst, temps[src].val); return; } @@ -248,10 +257,10 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, } temps[dst].mask = mask; - assert(temps[src].state != TCG_TEMP_CONST); + assert(!temp_is_const(src)); if (s->temps[src].type == s->temps[dst].type) { - if (temps[src].state != TCG_TEMP_COPY) { + if (!temp_is_copy(src)) { temps[src].state = TCG_TEMP_COPY; temps[src].next_copy = src; temps[src].prev_copy = src; @@ -488,7 +497,7 @@ static bool do_constant_folding_cond_eq(TCGCond c) static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, TCGArg y, TCGCond c) { - if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { + if (temp_is_const(x) && temp_is_const(y)) { switch (op_bits(op)) { case 32: return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); @@ -499,7 +508,7 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, } } else if (temps_are_copies(x, y)) { return do_constant_folding_cond_eq(c); - } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { + } else if (temp_is_const(y) && temps[y].val == 0) { switch (c) { case TCG_COND_LTU: return 0; @@ -520,12 +529,10 @@ static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) TCGArg al = p1[0], ah = p1[1]; TCGArg bl = p2[0], bh = p2[1]; - if (temps[bl].state == TCG_TEMP_CONST - && temps[bh].state == TCG_TEMP_CONST) { + if (temp_is_const(bl) && temp_is_const(bh)) { uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; - if (temps[al].state == TCG_TEMP_CONST - && temps[ah].state == TCG_TEMP_CONST) { + if (temp_is_const(al) && temp_is_const(ah)) { uint64_t a; a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; return do_constant_folding_cond_64(a, b, c); @@ -551,8 +558,8 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) { TCGArg a1 = *p1, a2 = *p2; int sum = 0; - sum += temps[a1].state == TCG_TEMP_CONST; - sum -= temps[a2].state == TCG_TEMP_CONST; + sum += temp_is_const(a1); + sum -= temp_is_const(a2); /* Prefer the constant in second argument, and then the form op a, a, b, which is better handled on non-RISC hosts. */ @@ -567,10 +574,10 @@ static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) static bool swap_commutative2(TCGArg *p1, TCGArg *p2) { int sum = 0; - sum += temps[p1[0]].state == TCG_TEMP_CONST; - sum += temps[p1[1]].state == TCG_TEMP_CONST; - sum -= temps[p2[0]].state == TCG_TEMP_CONST; - sum -= temps[p2[1]].state == TCG_TEMP_CONST; + sum += temp_is_const(p1[0]); + sum += temp_is_const(p1[1]); + sum -= temp_is_const(p2[0]); + sum -= temp_is_const(p2[1]); if (sum > 0) { TCGArg t; t = p1[0], p1[0] = p2[0], p2[0] = t; @@ -627,7 +634,7 @@ void tcg_optimize(TCGContext *s) /* Do copy propagation */ for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { - if (temps[args[i]].state == TCG_TEMP_COPY) { + if (temp_is_copy(args[i])) { args[i] = find_better_copy(s, args[i]); } } @@ -697,8 +704,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(sar): CASE_OP_32_64(rotl): CASE_OP_32_64(rotr): - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[1]].val == 0) { + if (temp_is_const(args[1]) && temps[args[1]].val == 0) { tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } @@ -708,7 +714,7 @@ void tcg_optimize(TCGContext *s) TCGOpcode neg_op; bool have_neg; - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { /* Proceed with possible constant folding. */ break; } @@ -722,8 +728,7 @@ void tcg_optimize(TCGContext *s) if (!have_neg) { break; } - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[1]].val == 0) { + if (temp_is_const(args[1]) && temps[args[1]].val == 0) { op->opc = neg_op; reset_temp(args[0]); args[1] = args[2]; @@ -733,34 +738,30 @@ void tcg_optimize(TCGContext *s) break; CASE_OP_32_64(xor): CASE_OP_32_64(nand): - if (temps[args[1]].state != TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == -1) { + if (!temp_is_const(args[1]) + && temp_is_const(args[2]) && temps[args[2]].val == -1) { i = 1; goto try_not; } break; CASE_OP_32_64(nor): - if (temps[args[1]].state != TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == 0) { + if (!temp_is_const(args[1]) + && temp_is_const(args[2]) && temps[args[2]].val == 0) { i = 1; goto try_not; } break; CASE_OP_32_64(andc): - if (temps[args[2]].state != TCG_TEMP_CONST - && temps[args[1]].state == TCG_TEMP_CONST - && temps[args[1]].val == -1) { + if (!temp_is_const(args[2]) + && temp_is_const(args[1]) && temps[args[1]].val == -1) { i = 2; goto try_not; } break; CASE_OP_32_64(orc): CASE_OP_32_64(eqv): - if (temps[args[2]].state != TCG_TEMP_CONST - && temps[args[1]].state == TCG_TEMP_CONST - && temps[args[1]].val == 0) { + if (!temp_is_const(args[2]) + && temp_is_const(args[1]) && temps[args[1]].val == 0) { i = 2; goto try_not; } @@ -801,9 +802,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(or): CASE_OP_32_64(xor): CASE_OP_32_64(andc): - if (temps[args[1]].state != TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == 0) { + if (!temp_is_const(args[1]) + && temp_is_const(args[2]) && temps[args[2]].val == 0) { tcg_opt_gen_mov(s, op, args, args[0], args[1]); continue; } @@ -811,9 +811,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(and): CASE_OP_32_64(orc): CASE_OP_32_64(eqv): - if (temps[args[1]].state != TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == -1) { + if (!temp_is_const(args[1]) + && temp_is_const(args[2]) && temps[args[2]].val == -1) { tcg_opt_gen_mov(s, op, args, args[0], args[1]); continue; } @@ -851,7 +850,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(and): mask = temps[args[2]].mask; - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { and_const: affected = temps[args[1]].mask & ~mask; } @@ -861,7 +860,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(andc): /* Known-zeros does not imply known-ones. Therefore unless args[2] is constant, we can't infer anything from it. */ - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { mask = ~temps[args[2]].mask; goto and_const; } @@ -870,26 +869,26 @@ void tcg_optimize(TCGContext *s) break; case INDEX_op_sar_i32: - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & 31; mask = (int32_t)temps[args[1]].mask >> tmp; } break; case INDEX_op_sar_i64: - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & 63; mask = (int64_t)temps[args[1]].mask >> tmp; } break; case INDEX_op_shr_i32: - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & 31; mask = (uint32_t)temps[args[1]].mask >> tmp; } break; case INDEX_op_shr_i64: - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & 63; mask = (uint64_t)temps[args[1]].mask >> tmp; } @@ -900,7 +899,7 @@ void tcg_optimize(TCGContext *s) break; CASE_OP_32_64(shl): - if (temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2])) { tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); mask = temps[args[1]].mask << tmp; } @@ -981,8 +980,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(mul): CASE_OP_32_64(muluh): CASE_OP_32_64(mulsh): - if ((temps[args[2]].state == TCG_TEMP_CONST - && temps[args[2]].val == 0)) { + if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) { tcg_opt_gen_movi(s, op, args, args[0], 0); continue; } @@ -1037,7 +1035,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(ext16u): case INDEX_op_ext32s_i64: case INDEX_op_ext32u_i64: - if (temps[args[1]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[1])) { tmp = do_constant_folding(opc, temps[args[1]].val, 0); tcg_opt_gen_movi(s, op, args, args[0], tmp); break; @@ -1045,7 +1043,7 @@ void tcg_optimize(TCGContext *s) goto do_default; case INDEX_op_trunc_shr_i32: - if (temps[args[1]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[1])) { tmp = do_constant_folding(opc, temps[args[1]].val, args[2]); tcg_opt_gen_movi(s, op, args, args[0], tmp); break; @@ -1074,8 +1072,7 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(divu): CASE_OP_32_64(rem): CASE_OP_32_64(remu): - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[1]) && temp_is_const(args[2])) { tmp = do_constant_folding(opc, temps[args[1]].val, temps[args[2]].val); tcg_opt_gen_movi(s, op, args, args[0], tmp); @@ -1084,8 +1081,7 @@ void tcg_optimize(TCGContext *s) goto do_default; CASE_OP_32_64(deposit): - if (temps[args[1]].state == TCG_TEMP_CONST - && temps[args[2]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[1]) && temp_is_const(args[2])) { tmp = deposit64(temps[args[1]].val, args[3], args[4], temps[args[2]].val); tcg_opt_gen_movi(s, op, args, args[0], tmp); @@ -1125,10 +1121,8 @@ void tcg_optimize(TCGContext *s) case INDEX_op_add2_i32: case INDEX_op_sub2_i32: - if (temps[args[2]].state == TCG_TEMP_CONST - && temps[args[3]].state == TCG_TEMP_CONST - && temps[args[4]].state == TCG_TEMP_CONST - && temps[args[5]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2]) && temp_is_const(args[3]) + && temp_is_const(args[4]) && temp_is_const(args[5])) { uint32_t al = temps[args[2]].val; uint32_t ah = temps[args[3]].val; uint32_t bl = temps[args[4]].val; @@ -1157,8 +1151,7 @@ void tcg_optimize(TCGContext *s) goto do_default; case INDEX_op_mulu2_i32: - if (temps[args[2]].state == TCG_TEMP_CONST - && temps[args[3]].state == TCG_TEMP_CONST) { + if (temp_is_const(args[2]) && temp_is_const(args[3])) { uint32_t a = temps[args[2]].val; uint32_t b = temps[args[3]].val; uint64_t r = (uint64_t)a * b; @@ -1190,10 +1183,8 @@ void tcg_optimize(TCGContext *s) tcg_op_remove(s, op); } } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) - && temps[args[2]].state == TCG_TEMP_CONST - && temps[args[3]].state == TCG_TEMP_CONST - && temps[args[2]].val == 0 - && temps[args[3]].val == 0) { + && temp_is_const(args[2]) && temps[args[2]].val == 0 + && temp_is_const(args[3]) && temps[args[3]].val == 0) { /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ do_brcond_high: @@ -1255,10 +1246,8 @@ void tcg_optimize(TCGContext *s) do_setcond_const: tcg_opt_gen_movi(s, op, args, args[0], tmp); } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) - && temps[args[3]].state == TCG_TEMP_CONST - && temps[args[4]].state == TCG_TEMP_CONST - && temps[args[3]].val == 0 - && temps[args[4]].val == 0) { + && temp_is_const(args[3]) && temps[args[3]].val == 0 + && temp_is_const(args[4]) && temps[args[4]].val == 0) { /* Simplify LT/GE comparisons vs zero to a single compare vs the high word of the input. */ do_setcond_high: From b41059dd9deec367a4ccd296659f0bc5de2dc705 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 27 Jul 2015 12:41:44 +0200 Subject: [PATCH 04/18] tcg/optimize: track const/copy status separately Instead of using an enum which could be either a copy or a const, track them separately. This will be used in the next patch. Constants are tracked through a bool. Copies are tracked by initializing temp's next_copy and prev_copy to itself, allowing to simplify the code a bit. Signed-off-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/optimize.c | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index c058d04ec0..5c60e1c228 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -35,14 +35,8 @@ glue(glue(case INDEX_op_, x), _i32): \ glue(glue(case INDEX_op_, x), _i64) -typedef enum { - TCG_TEMP_UNDEF = 0, - TCG_TEMP_CONST, - TCG_TEMP_COPY, -} tcg_temp_state; - struct tcg_temp_info { - tcg_temp_state state; + bool is_const; uint16_t prev_copy; uint16_t next_copy; tcg_target_ulong val; @@ -54,27 +48,22 @@ static TCGTempSet temps_used; static inline bool temp_is_const(TCGArg arg) { - return temps[arg].state == TCG_TEMP_CONST; + return temps[arg].is_const; } static inline bool temp_is_copy(TCGArg arg) { - return temps[arg].state == TCG_TEMP_COPY; + return temps[arg].next_copy != arg; } -/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove - the copy flag from the left temp. */ +/* Reset TEMP's state, possibly removing the temp for the list of copies. */ static void reset_temp(TCGArg temp) { - if (temp_is_copy(temp)) { - if (temps[temp].prev_copy == temps[temp].next_copy) { - temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; - } else { - temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; - temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; - } - } - temps[temp].state = TCG_TEMP_UNDEF; + temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; + temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; + temps[temp].next_copy = temp; + temps[temp].prev_copy = temp; + temps[temp].is_const = false; temps[temp].mask = -1; } @@ -88,7 +77,9 @@ static void reset_all_temps(int nb_temps) static void init_temp_info(TCGArg temp) { if (!test_bit(temp, temps_used.l)) { - temps[temp].state = TCG_TEMP_UNDEF; + temps[temp].next_copy = temp; + temps[temp].prev_copy = temp; + temps[temp].is_const = false; temps[temp].mask = -1; set_bit(temp, temps_used.l); } @@ -218,7 +209,7 @@ static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, op->opc = new_op; reset_temp(dst); - temps[dst].state = TCG_TEMP_CONST; + temps[dst].is_const = true; temps[dst].val = val; mask = val; if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) { @@ -260,16 +251,11 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, assert(!temp_is_const(src)); if (s->temps[src].type == s->temps[dst].type) { - if (!temp_is_copy(src)) { - temps[src].state = TCG_TEMP_COPY; - temps[src].next_copy = src; - temps[src].prev_copy = src; - } - temps[dst].state = TCG_TEMP_COPY; temps[dst].next_copy = temps[src].next_copy; temps[dst].prev_copy = src; temps[temps[dst].next_copy].prev_copy = dst; temps[src].next_copy = dst; + temps[dst].is_const = false; } args[0] = dst; From 299f80130401153af1a6ddb3cc011781bcd47600 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 27 Jul 2015 12:41:44 +0200 Subject: [PATCH 05/18] tcg/optimize: allow constant to have copies Now that copies and constants are tracked separately, we can allow constant to have copies, deferring the choice to use a register or a constant to the register allocation pass. This prevent this kind of regular constant reloading: -OUT: [size=338] +OUT: [size=298] mov -0x4(%r14),%ebp test %ebp,%ebp jne 0x7ffbe9cb0ed6 mov $0x40002219f8,%rbp mov %rbp,(%r14) - mov $0x40002219f8,%rbp mov $0x4000221a20,%rbx mov %rbp,(%rbx) mov $0x4000000000,%rbp mov %rbp,(%r14) - mov $0x4000000000,%rbp mov $0x4000221d38,%rbx mov %rbp,(%rbx) mov $0x40002221a8,%rbp mov %rbp,(%r14) - mov $0x40002221a8,%rbp mov $0x4000221d40,%rbx mov %rbp,(%rbx) mov $0x4000019170,%rbp mov %rbp,(%r14) - mov $0x4000019170,%rbp mov $0x4000221d48,%rbx mov %rbp,(%rbx) mov $0x40000049ee,%rbp mov %rbp,0x80(%r14) mov %r14,%rdi callq 0x7ffbe99924d0 mov $0x4000001680,%rbp mov %rbp,0x30(%r14) mov 0x10(%r14),%rbp mov $0x4000001680,%rbp mov %rbp,0x30(%r14) mov 0x10(%r14),%rbp shl $0x20,%rbp mov (%r14),%rbx mov %ebx,%ebx mov %rbx,(%r14) or %rbx,%rbp mov %rbp,0x10(%r14) mov %rbp,0x90(%r14) mov 0x60(%r14),%rbx mov %rbx,0x38(%r14) mov 0x28(%r14),%rbx mov $0x4000220e60,%r12 mov %rbx,(%r12) mov $0x40002219c8,%rbx mov %rbp,(%rbx) mov 0x20(%r14),%rbp sub $0x8,%rbp mov $0x4000004a16,%rbx mov %rbx,0x0(%rbp) mov %rbp,0x20(%r14) mov $0x19,%ebp mov %ebp,0xa8(%r14) mov $0x4000015110,%rbp mov %rbp,0x80(%r14) xor %eax,%eax jmpq 0x7ffbebcae426 lea -0x5f6d72a(%rip),%rax # 0x7ffbe3d437b3 jmpq 0x7ffbebcae426 Signed-off-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/optimize.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/tcg/optimize.c b/tcg/optimize.c index 5c60e1c228..a1edfd581b 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -230,11 +230,6 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, return; } - if (temp_is_const(src)) { - tcg_opt_gen_movi(s, op, args, dst, temps[src].val); - return; - } - TCGOpcode new_op = op_to_mov(op->opc); tcg_target_ulong mask; @@ -248,14 +243,13 @@ static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, } temps[dst].mask = mask; - assert(!temp_is_const(src)); - if (s->temps[src].type == s->temps[dst].type) { temps[dst].next_copy = temps[src].next_copy; temps[dst].prev_copy = src; temps[temps[dst].next_copy].prev_copy = dst; temps[src].next_copy = dst; - temps[dst].is_const = false; + temps[dst].is_const = temps[src].is_const; + temps[dst].val = temps[src].val; } args[0] = dst; From 0632e555fc4d281d69cb08d98d500d96185b041f Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 27 Jul 2015 12:41:45 +0200 Subject: [PATCH 06/18] tcg: rename trunc_shr_i32 into trunc_shr_i64_i32 The op is sometimes named trunc_shr_i32 and sometimes trunc_shr_i64_i32, and the name in the README doesn't match the name offered to the frontends. Always use the long name to make it clear it is a size changing op. Signed-off-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/README | 2 +- tcg/aarch64/tcg-target.h | 2 +- tcg/i386/tcg-target.h | 2 +- tcg/ia64/tcg-target.h | 2 +- tcg/optimize.c | 6 +++--- tcg/ppc/tcg-target.h | 2 +- tcg/s390/tcg-target.h | 2 +- tcg/sparc/tcg-target.c | 4 ++-- tcg/sparc/tcg-target.h | 2 +- tcg/tcg-op.c | 4 ++-- tcg/tcg-opc.h | 4 ++-- tcg/tcg.h | 2 +- tcg/tci/tcg-target.h | 2 +- 13 files changed, 18 insertions(+), 18 deletions(-) diff --git a/tcg/README b/tcg/README index a550ff176d..61b3899622 100644 --- a/tcg/README +++ b/tcg/README @@ -314,7 +314,7 @@ This operation would be equivalent to dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00) -* trunc_shr_i32 t0, t1, pos +* trunc_shr_i64_i32 t0, t1, pos For 64-bit hosts only, right shift the 64-bit input T1 by POS and truncate to 32-bit output T0. Depending on the host, this may be diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 8aec04d2bf..dfd880131b 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -70,7 +70,7 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 25b513354c..dae50ba368 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -102,7 +102,7 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_ext8s_i64 1 diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index a04ed81262..29902f987a 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -160,7 +160,7 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_mulsh_i64 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) diff --git a/tcg/optimize.c b/tcg/optimize.c index a1edfd581b..47f41472f1 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -288,7 +288,7 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) case INDEX_op_shr_i32: return (uint32_t)x >> (y & 31); - case INDEX_op_trunc_shr_i32: + case INDEX_op_trunc_shr_i64_i32: case INDEX_op_shr_i64: return (uint64_t)x >> (y & 63); @@ -874,7 +874,7 @@ void tcg_optimize(TCGContext *s) } break; - case INDEX_op_trunc_shr_i32: + case INDEX_op_trunc_shr_i64_i32: mask = (uint64_t)temps[args[1]].mask >> args[2]; break; @@ -1022,7 +1022,7 @@ void tcg_optimize(TCGContext *s) } goto do_default; - case INDEX_op_trunc_shr_i32: + case INDEX_op_trunc_shr_i64_i32: if (temp_is_const(args[1])) { tmp = do_constant_folding(opc, temps[args[1]].val, args[2]); tcg_opt_gen_movi(s, op, args, args[0], tmp); diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 7ce7048824..b7e6861b79 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -77,7 +77,7 @@ typedef enum { #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 91576d5949..50016a874a 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -72,7 +72,7 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index 1a870a81d7..b23032bb49 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -1413,7 +1413,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext32u_i64: tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL); break; - case INDEX_op_trunc_shr_i32: + case INDEX_op_trunc_shr_i64_i32: if (a2 == 0) { tcg_out_mov(s, TCG_TYPE_I32, a0, a1); } else { @@ -1533,7 +1533,7 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_ext32s_i64, { "R", "r" } }, { INDEX_op_ext32u_i64, { "R", "r" } }, - { INDEX_op_trunc_shr_i32, { "r", "R" } }, + { INDEX_op_trunc_shr_i64_i32, { "r", "R" } }, { INDEX_op_brcond_i64, { "RZ", "RJ" } }, { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } }, diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index f584de4766..336c47fbd2 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -118,7 +118,7 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i32 1 +#define TCG_TARGET_HAS_trunc_shr_i64_i32 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 45098c310e..61b64db092 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1751,8 +1751,8 @@ void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned count) tcg_gen_mov_i32(ret, TCGV_LOW(t)); tcg_temp_free_i64(t); } - } else if (TCG_TARGET_HAS_trunc_shr_i32) { - tcg_gen_op3i_i32(INDEX_op_trunc_shr_i32, ret, + } else if (TCG_TARGET_HAS_trunc_shr_i64_i32) { + tcg_gen_op3i_i32(INDEX_op_trunc_shr_i64_i32, ret, MAKE_TCGV_I32(GET_TCGV_I64(arg)), count); } else if (count == 0) { tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 13ccb60a5d..4a34f43a9e 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -138,8 +138,8 @@ DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) -DEF(trunc_shr_i32, 1, 1, 1, - IMPL(TCG_TARGET_HAS_trunc_shr_i32) +DEF(trunc_shr_i64_i32, 1, 1, 1, + IMPL(TCG_TARGET_HAS_trunc_shr_i64_i32) | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64) diff --git a/tcg/tcg.h b/tcg/tcg.h index 231a781524..e7e33b9bb8 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -66,7 +66,7 @@ typedef uint64_t TCGRegSet; #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_div2_i64 0 diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index cbf3f9b5a6..8b1139b3f2 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -84,7 +84,7 @@ #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_trunc_shr_i32 0 +#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 From 6acd2558fdb7dd9de6b10697914bdc1d75d624e5 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 27 Jul 2015 12:41:45 +0200 Subject: [PATCH 07/18] tcg: don't abuse TCG type in tcg_gen_trunc_shr_i64_i32 The tcg_gen_trunc_shr_i64_i32 function takes a 64-bit argument and returns a 32-bit value. Directly call tcg_gen_op3 with the correct types instead of calling tcg_gen_op3i_i32 and abusing the TCG types. Signed-off-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/tcg-op.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 61b64db092..0e79fd1dc6 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1752,8 +1752,8 @@ void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned count) tcg_temp_free_i64(t); } } else if (TCG_TARGET_HAS_trunc_shr_i64_i32) { - tcg_gen_op3i_i32(INDEX_op_trunc_shr_i64_i32, ret, - MAKE_TCGV_I32(GET_TCGV_I64(arg)), count); + tcg_gen_op3(&tcg_ctx, INDEX_op_trunc_shr_i64_i32, + GET_TCGV_I32(ret), GET_TCGV_I64(arg), count); } else if (count == 0) { tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); } else { From 4f2331e5b67af8172419eb1c8db510b497b30a7b Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 27 Jul 2015 12:41:45 +0200 Subject: [PATCH 08/18] tcg: implement real ext_i32_i64 and extu_i32_i64 ops Implement real ext_i32_i64 and extu_i32_i64 ops. They ensure that a 32-bit value is always converted to a 64-bit value and not propagated through the register allocator or the optimizer. Cc: Andrzej Zaborowski Cc: Alexander Graf Cc: Blue Swirl Cc: Stefan Weil Acked-by: Claudio Fontana Signed-off-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c | 4 ++++ tcg/i386/tcg-target.c | 5 +++++ tcg/ia64/tcg-target.c | 4 ++++ tcg/ppc/tcg-target.c | 6 ++++++ tcg/s390/tcg-target.c | 5 +++++ tcg/sparc/tcg-target.c | 8 ++++++-- tcg/tcg-op.c | 10 ++++------ tcg/tcg-opc.h | 3 +++ tcg/tci/tcg-target.c | 4 ++++ tci.c | 6 ++++-- 10 files changed, 45 insertions(+), 10 deletions(-) diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index b7ec4f5ace..7f7ab7e9aa 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -1556,6 +1556,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16s_i32: tcg_out_sxt(s, ext, MO_16, a0, a1); break; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1); break; @@ -1567,6 +1568,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16u_i32: tcg_out_uxt(s, MO_16, a0, a1); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tcg_out_movr(s, TCG_TYPE_I32, a0, a1); break; @@ -1712,6 +1714,8 @@ static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_ext8u_i64, { "r", "r" } }, { INDEX_op_ext16u_i64, { "r", "r" } }, { INDEX_op_ext32u_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, { INDEX_op_deposit_i64, { "r", "0", "rZ" } }, diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 887f22f675..7648f7efd4 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -2064,9 +2064,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_bswap64_i64: tcg_out_bswap64(s, args[0]); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tcg_out_ext32u(s, args[0], args[1]); break; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tcg_out_ext32s(s, args[0], args[1]); break; @@ -2201,6 +2203,9 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_ext16u_i64, { "r", "r" } }, { INDEX_op_ext32u_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + { INDEX_op_deposit_i64, { "Q", "0", "Q" } }, { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } }, diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 81cb9f79f3..71e79cfbbc 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -2148,9 +2148,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16u_i64: tcg_out_ext(s, OPC_ZXT2_I29, args[0], args[1]); break; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tcg_out_ext(s, OPC_SXT4_I29, args[0], args[1]); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tcg_out_ext(s, OPC_ZXT4_I29, args[0], args[1]); break; @@ -2301,6 +2303,8 @@ static const TCGTargetOpDef ia64_op_defs[] = { { INDEX_op_ext16u_i64, { "r", "rZ"} }, { INDEX_op_ext32s_i64, { "r", "rZ"} }, { INDEX_op_ext32u_i64, { "r", "rZ"} }, + { INDEX_op_ext_i32_i64, { "r", "rZ" } }, + { INDEX_op_extu_i32_i64, { "r", "rZ" } }, { INDEX_op_bswap16_i64, { "r", "rZ" } }, { INDEX_op_bswap32_i64, { "r", "rZ" } }, diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 2b6eafa03c..31fa25c421 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -2200,12 +2200,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_ext16s_i64: c = EXTSH; goto gen_ext; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: c = EXTSW; goto gen_ext; gen_ext: tcg_out32(s, c | RS(args[1]) | RA(args[0])); break; + case INDEX_op_extu_i32_i64: + tcg_out_ext32u(s, args[0], args[1]); + break; case INDEX_op_setcond_i32: tcg_out_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1], args[2], @@ -2482,6 +2486,8 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_ext8s_i64, { "r", "r" } }, { INDEX_op_ext16s_i64, { "r", "r" } }, { INDEX_op_ext32s_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, { INDEX_op_bswap16_i64, { "r", "r" } }, { INDEX_op_bswap32_i64, { "r", "r" } }, { INDEX_op_bswap64_i64, { "r", "r" } }, diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index aa718eca0c..96c3d65d76 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -2090,6 +2090,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16s_i64: tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]); break; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tgen_ext32s(s, args[0], args[1]); break; @@ -2099,6 +2100,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext16u_i64: tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tgen_ext32u(s, args[0], args[1]); break; @@ -2251,6 +2253,9 @@ static const TCGTargetOpDef s390_op_defs[] = { { INDEX_op_ext32s_i64, { "r", "r" } }, { INDEX_op_ext32u_i64, { "r", "r" } }, + { INDEX_op_ext_i32_i64, { "r", "r" } }, + { INDEX_op_extu_i32_i64, { "r", "r" } }, + { INDEX_op_bswap16_i64, { "r", "r" } }, { INDEX_op_bswap32_i64, { "r", "r" } }, { INDEX_op_bswap64_i64, { "r", "r" } }, diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index b23032bb49..fe75af08dc 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -1407,9 +1407,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_divu_i64: c = ARITH_UDIVX; goto gen_arith; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: tcg_out_arithi(s, a0, a1, 0, SHIFT_SRA); break; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL); break; @@ -1531,8 +1533,10 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_neg_i64, { "R", "RJ" } }, { INDEX_op_not_i64, { "R", "RJ" } }, - { INDEX_op_ext32s_i64, { "R", "r" } }, - { INDEX_op_ext32u_i64, { "R", "r" } }, + { INDEX_op_ext32s_i64, { "R", "R" } }, + { INDEX_op_ext32u_i64, { "R", "R" } }, + { INDEX_op_ext_i32_i64, { "R", "r" } }, + { INDEX_op_extu_i32_i64, { "R", "r" } }, { INDEX_op_trunc_shr_i64_i32, { "r", "R" } }, { INDEX_op_brcond_i64, { "RZ", "RJ" } }, diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 0e79fd1dc6..711431567e 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1770,9 +1770,8 @@ void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg) tcg_gen_mov_i32(TCGV_LOW(ret), arg); tcg_gen_movi_i32(TCGV_HIGH(ret), 0); } else { - /* Note: we assume the target supports move between - 32 and 64 bit registers. */ - tcg_gen_ext32u_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); + tcg_gen_op2(&tcg_ctx, INDEX_op_extu_i32_i64, + GET_TCGV_I64(ret), GET_TCGV_I32(arg)); } } @@ -1782,9 +1781,8 @@ void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg) tcg_gen_mov_i32(TCGV_LOW(ret), arg); tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_LOW(ret), 31); } else { - /* Note: we assume the target supports move between - 32 and 64 bit registers. */ - tcg_gen_ext32s_i64(ret, MAKE_TCGV_I64(GET_TCGV_I32(arg))); + tcg_gen_op2(&tcg_ctx, INDEX_op_ext_i32_i64, + GET_TCGV_I64(ret), GET_TCGV_I32(arg)); } } diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 4a34f43a9e..f721a5a007 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -138,6 +138,9 @@ DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) +/* size changing ops */ +DEF(ext_i32_i64, 1, 1, 0, IMPL64) +DEF(extu_i32_i64, 1, 1, 0, IMPL64) DEF(trunc_shr_i64_i32, 1, 1, 1, IMPL(TCG_TARGET_HAS_trunc_shr_i64_i32) | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c index 83472dbcd8..bbb54d4e8c 100644 --- a/tcg/tci/tcg-target.c +++ b/tcg/tci/tcg-target.c @@ -210,6 +210,8 @@ static const TCGTargetOpDef tcg_target_op_defs[] = { #if TCG_TARGET_HAS_ext32u_i64 { INDEX_op_ext32u_i64, { R, R } }, #endif + { INDEX_op_ext_i32_i64, { R, R } }, + { INDEX_op_extu_i32_i64, { R, R } }, #if TCG_TARGET_HAS_bswap16_i64 { INDEX_op_bswap16_i64, { R, R } }, #endif @@ -701,6 +703,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, case INDEX_op_ext16u_i64: /* Optional (TCG_TARGET_HAS_ext16u_i64). */ case INDEX_op_ext32s_i64: /* Optional (TCG_TARGET_HAS_ext32s_i64). */ case INDEX_op_ext32u_i64: /* Optional (TCG_TARGET_HAS_ext32u_i64). */ + case INDEX_op_ext_i32_i64: + case INDEX_op_extu_i32_i64: #endif /* TCG_TARGET_REG_BITS == 64 */ case INDEX_op_neg_i32: /* Optional (TCG_TARGET_HAS_neg_i32). */ case INDEX_op_not_i32: /* Optional (TCG_TARGET_HAS_not_i32). */ diff --git a/tci.c b/tci.c index 84449489d2..3d6d17783d 100644 --- a/tci.c +++ b/tci.c @@ -1033,18 +1033,20 @@ uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr) #endif #if TCG_TARGET_HAS_ext32s_i64 case INDEX_op_ext32s_i64: +#endif + case INDEX_op_ext_i32_i64: t0 = *tb_ptr++; t1 = tci_read_r32s(&tb_ptr); tci_write_reg64(t0, t1); break; -#endif #if TCG_TARGET_HAS_ext32u_i64 case INDEX_op_ext32u_i64: +#endif + case INDEX_op_extu_i32_i64: t0 = *tb_ptr++; t1 = tci_read_r32(&tb_ptr); tci_write_reg64(t0, t1); break; -#endif #if TCG_TARGET_HAS_bswap16_i64 case INDEX_op_bswap16_i64: TODO(); From 8bcb5c8f34f9215d4f88f388c7ff14c9bd5cecd3 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 27 Jul 2015 12:41:45 +0200 Subject: [PATCH 09/18] tcg/optimize: add optimizations for ext_i32_i64 and extu_i32_i64 ops They behave the same as ext32s_i64 and ext32u_i64 from the constant folding and zero propagation point of view, except that they can't be replaced by a mov, so we don't compute the affected value. Signed-off-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/optimize.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tcg/optimize.c b/tcg/optimize.c index 47f41472f1..18046051d3 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -343,9 +343,11 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) CASE_OP_32_64(ext16u): return (uint16_t)x; + case INDEX_op_ext_i32_i64: case INDEX_op_ext32s_i64: return (int32_t)x; + case INDEX_op_extu_i32_i64: case INDEX_op_ext32u_i64: return (uint32_t)x; @@ -837,6 +839,15 @@ void tcg_optimize(TCGContext *s) mask = temps[args[1]].mask & mask; break; + case INDEX_op_ext_i32_i64: + if ((temps[args[1]].mask & 0x80000000) != 0) { + break; + } + case INDEX_op_extu_i32_i64: + /* We do not compute affected as it is a size changing op. */ + mask = (uint32_t)temps[args[1]].mask; + break; + CASE_OP_32_64(andc): /* Known-zeros does not imply known-ones. Therefore unless args[2] is constant, we can't infer anything from it. */ @@ -1015,6 +1026,8 @@ void tcg_optimize(TCGContext *s) CASE_OP_32_64(ext16u): case INDEX_op_ext32s_i64: case INDEX_op_ext32u_i64: + case INDEX_op_ext_i32_i64: + case INDEX_op_extu_i32_i64: if (temp_is_const(args[1])) { tmp = do_constant_folding(opc, temps[args[1]].val, 0); tcg_opt_gen_movi(s, op, args, args[0], tmp); From 870ad1547ac53bc79c21d86cf453b3b20cc660a2 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 27 Jul 2015 12:41:45 +0200 Subject: [PATCH 10/18] tcg: update README about size changing ops Signed-off-by: Aurelien Jarno Signed-off-by: Richard Henderson --- tcg/README | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tcg/README b/tcg/README index 61b3899622..a22f25145c 100644 --- a/tcg/README +++ b/tcg/README @@ -466,13 +466,25 @@ On a 32 bit target, all 64 bit operations are converted to 32 bits. A few specific operations must be implemented to allow it (see add2_i32, sub2_i32, brcond2_i32). +On a 64 bit target, the values are transfered between 32 and 64-bit +registers using the following ops: +- trunc_shr_i64_i32 +- ext_i32_i64 +- extu_i32_i64 + +They ensure that the values are correctly truncated or extended when +moved from a 32-bit to a 64-bit register or vice-versa. Note that the +trunc_shr_i64_i32 is an optional op. It is not necessary to implement +it if all the following conditions are met: +- 64-bit registers can hold 32-bit values +- 32-bit values in a 64-bit register do not need to stay zero or + sign extended +- all 32-bit TCG ops ignore the high part of 64-bit registers + Floating point operations are not supported in this version. A previous incarnation of the code generator had full support of them, but it is better to concentrate on integer operations first. -On a 64 bit target, no assumption is made in TCG about the storage of -the 32 bit values in 64 bit registers. - 4.2) Constraints GCC like constraints are used to define the constraints of every From 609ad70562793937257c89d07bf7c1370b9fc9aa Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 24 Jul 2015 07:16:00 -0700 Subject: [PATCH 11/18] tcg: Split trunc_shr_i32 opcode into extr[lh]_i64_i32 Rather than allow arbitrary shift+trunc, only concern ourselves with low and high parts. This is all that was being used anyway. Signed-off-by: Richard Henderson --- target-tricore/translate.c | 12 ++++++------ tcg/README | 14 ++++++++++---- tcg/aarch64/tcg-target.h | 3 ++- tcg/i386/tcg-target.h | 3 ++- tcg/ia64/tcg-target.h | 3 ++- tcg/optimize.c | 22 +++++++++++----------- tcg/ppc/tcg-target.h | 3 ++- tcg/s390/tcg-target.h | 3 ++- tcg/sparc/tcg-target.c | 14 +++++++------- tcg/sparc/tcg-target.h | 3 ++- tcg/tcg-op.c | 38 +++++++++++++++++++------------------- tcg/tcg-op.h | 5 +++-- tcg/tcg-opc.h | 7 +++++-- tcg/tcg.h | 3 ++- tcg/tci/tcg-target.h | 3 ++- 15 files changed, 77 insertions(+), 59 deletions(-) diff --git a/target-tricore/translate.c b/target-tricore/translate.c index 7dc7a325b4..70f09300ee 100644 --- a/target-tricore/translate.c +++ b/target-tricore/translate.c @@ -457,11 +457,11 @@ gen_add64_d(TCGv_i64 ret, TCGv_i64 r1, TCGv_i64 r2) tcg_gen_xor_i64(t1, result, r1); tcg_gen_xor_i64(t0, r1, r2); tcg_gen_andc_i64(t1, t1, t0); - tcg_gen_trunc_shr_i64_i32(cpu_PSW_V, t1, 32); + tcg_gen_extrh_i64_i32(cpu_PSW_V, t1); /* calc SV bit */ tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* calc AV/SAV bits */ - tcg_gen_trunc_shr_i64_i32(temp, result, 32); + tcg_gen_extrh_i64_i32(temp, result); tcg_gen_add_tl(cpu_PSW_AV, temp, temp); tcg_gen_xor_tl(cpu_PSW_AV, temp, cpu_PSW_AV); /* calc SAV */ @@ -1273,7 +1273,7 @@ gen_madd64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, tcg_gen_xor_i64(t3, t4, t1); tcg_gen_xor_i64(t2, t1, t2); tcg_gen_andc_i64(t3, t3, t2); - tcg_gen_trunc_shr_i64_i32(cpu_PSW_V, t3, 32); + tcg_gen_extrh_i64_i32(cpu_PSW_V, t3); /* We produce an overflow on the host if the mul before was (0x80000000 * 0x80000000) << 1). If this is the case, we negate the ovf. */ @@ -1630,11 +1630,11 @@ gen_sub64_d(TCGv_i64 ret, TCGv_i64 r1, TCGv_i64 r2) tcg_gen_xor_i64(t1, result, r1); tcg_gen_xor_i64(t0, r1, r2); tcg_gen_and_i64(t1, t1, t0); - tcg_gen_trunc_shr_i64_i32(cpu_PSW_V, t1, 32); + tcg_gen_extrh_i64_i32(cpu_PSW_V, t1); /* calc SV bit */ tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); /* calc AV/SAV bits */ - tcg_gen_trunc_shr_i64_i32(temp, result, 32); + tcg_gen_extrh_i64_i32(temp, result); tcg_gen_add_tl(cpu_PSW_AV, temp, temp); tcg_gen_xor_tl(cpu_PSW_AV, temp, cpu_PSW_AV); /* calc SAV */ @@ -2126,7 +2126,7 @@ gen_msub64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2, tcg_gen_xor_i64(t3, t4, t1); tcg_gen_xor_i64(t2, t1, t2); tcg_gen_and_i64(t3, t3, t2); - tcg_gen_trunc_shr_i64_i32(cpu_PSW_V, t3, 32); + tcg_gen_extrh_i64_i32(cpu_PSW_V, t3); /* We produce an overflow on the host if the mul before was (0x80000000 * 0x80000000) << 1). If this is the case, we negate the ovf. */ diff --git a/tcg/README b/tcg/README index a22f25145c..34c0775cff 100644 --- a/tcg/README +++ b/tcg/README @@ -314,11 +314,17 @@ This operation would be equivalent to dest = (t1 & ~0x0f00) | ((t2 << 8) & 0x0f00) -* trunc_shr_i64_i32 t0, t1, pos +* extrl_i64_i32 t0, t1 -For 64-bit hosts only, right shift the 64-bit input T1 by POS and -truncate to 32-bit output T0. Depending on the host, this may be -a simple mov/shift, or may require additional canonicalization. +For 64-bit hosts only, extract the low 32-bits of input T1 and place it +into 32-bit output T0. Depending on the host, this may be a simple move, +or may require additional canonicalization. + +* extrh_i64_i32 t0, t1 + +For 64-bit hosts only, extract the high 32-bits of input T1 and place it +into 32-bit output T0. Depending on the host, this may be a simple shift, +or may require additional canonicalization. ********* Conditional moves diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index dfd880131b..19a04a6e75 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -70,7 +70,8 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index dae50ba368..92be341713 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -102,7 +102,8 @@ extern bool have_bmi1; #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 #define TCG_TARGET_HAS_ext8s_i64 1 diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index 29902f987a..ae9b79f02f 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -160,7 +160,8 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i64 0 #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_mulsh_i64 0 -#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_deposit_i32_valid(ofs, len) ((len) <= 16) #define TCG_TARGET_deposit_i64_valid(ofs, len) ((len) <= 16) diff --git a/tcg/optimize.c b/tcg/optimize.c index 18046051d3..10795ec9d5 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -288,7 +288,6 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) case INDEX_op_shr_i32: return (uint32_t)x >> (y & 31); - case INDEX_op_trunc_shr_i64_i32: case INDEX_op_shr_i64: return (uint64_t)x >> (y & 63); @@ -348,9 +347,13 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) return (int32_t)x; case INDEX_op_extu_i32_i64: + case INDEX_op_extrl_i64_i32: case INDEX_op_ext32u_i64: return (uint32_t)x; + case INDEX_op_extrh_i64_i32: + return (uint64_t)x >> 32; + case INDEX_op_muluh_i32: return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; case INDEX_op_mulsh_i32: @@ -885,8 +888,11 @@ void tcg_optimize(TCGContext *s) } break; - case INDEX_op_trunc_shr_i64_i32: - mask = (uint64_t)temps[args[1]].mask >> args[2]; + case INDEX_op_extrl_i64_i32: + mask = (uint32_t)temps[args[1]].mask; + break; + case INDEX_op_extrh_i64_i32: + mask = (uint64_t)temps[args[1]].mask >> 32; break; CASE_OP_32_64(shl): @@ -1028,6 +1034,8 @@ void tcg_optimize(TCGContext *s) case INDEX_op_ext32u_i64: case INDEX_op_ext_i32_i64: case INDEX_op_extu_i32_i64: + case INDEX_op_extrl_i64_i32: + case INDEX_op_extrh_i64_i32: if (temp_is_const(args[1])) { tmp = do_constant_folding(opc, temps[args[1]].val, 0); tcg_opt_gen_movi(s, op, args, args[0], tmp); @@ -1035,14 +1043,6 @@ void tcg_optimize(TCGContext *s) } goto do_default; - case INDEX_op_trunc_shr_i64_i32: - if (temp_is_const(args[1])) { - tmp = do_constant_folding(opc, temps[args[1]].val, args[2]); - tcg_opt_gen_movi(s, op, args, args[0], tmp); - break; - } - goto do_default; - CASE_OP_32_64(add): CASE_OP_32_64(sub): CASE_OP_32_64(mul): diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index b7e6861b79..b4f0818762 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -77,7 +77,8 @@ typedef enum { #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 #define TCG_TARGET_HAS_sub2_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 50016a874a..d9dc038733 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -72,7 +72,8 @@ typedef enum TCGReg { #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div2_i64 1 #define TCG_TARGET_HAS_rot_i64 1 diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index fe75af08dc..87f9bcca4b 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -1415,12 +1415,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_ext32u_i64: tcg_out_arithi(s, a0, a1, 0, SHIFT_SRL); break; - case INDEX_op_trunc_shr_i64_i32: - if (a2 == 0) { - tcg_out_mov(s, TCG_TYPE_I32, a0, a1); - } else { - tcg_out_arithi(s, a0, a1, a2, SHIFT_SRLX); - } + case INDEX_op_extrl_i64_i32: + tcg_out_mov(s, TCG_TYPE_I32, a0, a1); + break; + case INDEX_op_extrh_i64_i32: + tcg_out_arithi(s, a0, a1, 32, SHIFT_SRLX); break; case INDEX_op_brcond_i64: @@ -1537,7 +1536,8 @@ static const TCGTargetOpDef sparc_op_defs[] = { { INDEX_op_ext32u_i64, { "R", "R" } }, { INDEX_op_ext_i32_i64, { "R", "r" } }, { INDEX_op_extu_i32_i64, { "R", "r" } }, - { INDEX_op_trunc_shr_i64_i32, { "r", "R" } }, + { INDEX_op_extrl_i64_i32, { "r", "R" } }, + { INDEX_op_extrh_i64_i32, { "r", "R" } }, { INDEX_op_brcond_i64, { "RZ", "RJ" } }, { INDEX_op_setcond_i64, { "R", "RZ", "RJ" } }, diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index 336c47fbd2..2cd72d2d41 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -118,7 +118,8 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_trunc_shr_i64_i32 1 +#define TCG_TARGET_HAS_extrl_i64_i32 1 +#define TCG_TARGET_HAS_extrh_i64_i32 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_rot_i64 0 diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 711431567e..0b9dd8ff9f 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1737,28 +1737,28 @@ void tcg_gen_muls2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) /* Size changing operations. */ -void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned count) +void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { - tcg_debug_assert(count < 64); if (TCG_TARGET_REG_BITS == 32) { - if (count >= 32) { - tcg_gen_shri_i32(ret, TCGV_HIGH(arg), count - 32); - } else if (count == 0) { - tcg_gen_mov_i32(ret, TCGV_LOW(arg)); - } else { - TCGv_i64 t = tcg_temp_new_i64(); - tcg_gen_shri_i64(t, arg, count); - tcg_gen_mov_i32(ret, TCGV_LOW(t)); - tcg_temp_free_i64(t); - } - } else if (TCG_TARGET_HAS_trunc_shr_i64_i32) { - tcg_gen_op3(&tcg_ctx, INDEX_op_trunc_shr_i64_i32, - GET_TCGV_I32(ret), GET_TCGV_I64(arg), count); - } else if (count == 0) { + tcg_gen_mov_i32(ret, TCGV_LOW(arg)); + } else if (TCG_TARGET_HAS_extrl_i64_i32) { + tcg_gen_op2(&tcg_ctx, INDEX_op_extrl_i64_i32, + GET_TCGV_I32(ret), GET_TCGV_I64(arg)); + } else { tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(arg))); + } +} + +void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg) +{ + if (TCG_TARGET_REG_BITS == 32) { + tcg_gen_mov_i32(ret, TCGV_HIGH(arg)); + } else if (TCG_TARGET_HAS_extrh_i64_i32) { + tcg_gen_op2(&tcg_ctx, INDEX_op_extrh_i64_i32, + GET_TCGV_I32(ret), GET_TCGV_I64(arg)); } else { TCGv_i64 t = tcg_temp_new_i64(); - tcg_gen_shri_i64(t, arg, count); + tcg_gen_shri_i64(t, arg, 32); tcg_gen_mov_i32(ret, MAKE_TCGV_I32(GET_TCGV_I64(t))); tcg_temp_free_i64(t); } @@ -1818,8 +1818,8 @@ void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg) tcg_gen_mov_i32(lo, TCGV_LOW(arg)); tcg_gen_mov_i32(hi, TCGV_HIGH(arg)); } else { - tcg_gen_trunc_shr_i64_i32(lo, arg, 0); - tcg_gen_trunc_shr_i64_i32(hi, arg, 32); + tcg_gen_extrl_i64_i32(lo, arg); + tcg_gen_extrh_i64_i32(hi, arg); } } diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index d1d763f6ff..6b59eedf74 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -684,7 +684,8 @@ static inline void tcg_gen_neg_i64(TCGv_i64 ret, TCGv_i64 arg) void tcg_gen_extu_i32_i64(TCGv_i64 ret, TCGv_i32 arg); void tcg_gen_ext_i32_i64(TCGv_i64 ret, TCGv_i32 arg); void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high); -void tcg_gen_trunc_shr_i64_i32(TCGv_i32 ret, TCGv_i64 arg, unsigned int c); +void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg); +void tcg_gen_extrh_i64_i32(TCGv_i32 ret, TCGv_i64 arg); void tcg_gen_extr_i64_i32(TCGv_i32 lo, TCGv_i32 hi, TCGv_i64 arg); void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg); @@ -695,7 +696,7 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi) static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) { - tcg_gen_trunc_shr_i64_i32(ret, arg, 0); + tcg_gen_extrl_i64_i32(ret, arg); } /* QEMU specific operations. */ diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index f721a5a007..02bbf30387 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -141,8 +141,11 @@ DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) /* size changing ops */ DEF(ext_i32_i64, 1, 1, 0, IMPL64) DEF(extu_i32_i64, 1, 1, 0, IMPL64) -DEF(trunc_shr_i64_i32, 1, 1, 1, - IMPL(TCG_TARGET_HAS_trunc_shr_i64_i32) +DEF(extrl_i64_i32, 1, 1, 0, + IMPL(TCG_TARGET_HAS_extrl_i64_i32) + | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) +DEF(extrh_i64_i32, 1, 1, 0, + IMPL(TCG_TARGET_HAS_extrh_i64_i32) | (TCG_TARGET_REG_BITS == 32 ? TCG_OPF_NOT_PRESENT : 0)) DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64) diff --git a/tcg/tcg.h b/tcg/tcg.h index e7e33b9bb8..f437824ba9 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -66,7 +66,8 @@ typedef uint64_t TCGRegSet; #if TCG_TARGET_REG_BITS == 32 /* Turn some undef macros into false macros. */ -#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_div_i64 0 #define TCG_TARGET_HAS_rem_i64 0 #define TCG_TARGET_HAS_div2_i64 0 diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 8b1139b3f2..77e5952781 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -84,7 +84,8 @@ #define TCG_TARGET_HAS_mulsh_i32 0 #if TCG_TARGET_REG_BITS == 64 -#define TCG_TARGET_HAS_trunc_shr_i64_i32 0 +#define TCG_TARGET_HAS_extrl_i64_i32 0 +#define TCG_TARGET_HAS_extrh_i64_i32 0 #define TCG_TARGET_HAS_bswap16_i64 1 #define TCG_TARGET_HAS_bswap32_i64 1 #define TCG_TARGET_HAS_bswap64_i64 1 From ecc7b3aa71f5fdcf9ee87e74ca811d988282641d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 24 Jul 2015 11:49:53 -0700 Subject: [PATCH 12/18] tcg: Remove tcg_gen_trunc_i64_i32 Replacing it with tcg_gen_extrl_i64_i32. Signed-off-by: Richard Henderson --- target-alpha/translate.c | 4 +-- target-arm/translate-a64.c | 60 +++++++++++++++++------------------ target-arm/translate.c | 46 +++++++++++++-------------- target-cris/translate.c | 4 +-- target-m68k/translate.c | 2 +- target-microblaze/translate.c | 8 ++--- target-mips/translate.c | 4 +-- target-openrisc/translate.c | 22 ++++++------- target-s390x/translate.c | 30 +++++++++--------- target-sh4/translate.c | 4 +-- target-sparc/translate.c | 14 ++++---- target-tricore/translate.c | 20 ++++++------ target-xtensa/translate.c | 2 +- tcg/tcg-op.h | 9 ++---- 14 files changed, 112 insertions(+), 117 deletions(-) diff --git a/target-alpha/translate.c b/target-alpha/translate.c index 2849ede85c..1fd9c3be96 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -2007,7 +2007,7 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn) REQUIRE_REG_31(rb); t32 = tcg_temp_new_i32(); va = load_gpr(ctx, ra); - tcg_gen_trunc_i64_i32(t32, va); + tcg_gen_extrl_i64_i32(t32, va); gen_helper_memory_to_s(vc, t32); tcg_temp_free_i32(t32); break; @@ -2027,7 +2027,7 @@ static ExitStatus translate_one(DisasContext *ctx, uint32_t insn) REQUIRE_REG_31(rb); t32 = tcg_temp_new_i32(); va = load_gpr(ctx, ra); - tcg_gen_trunc_i64_i32(t32, va); + tcg_gen_extrl_i64_i32(t32, va); gen_helper_memory_to_f(vc, t32); tcg_temp_free_i32(t32); break; diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 689f2be896..5c13e153d4 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -528,9 +528,9 @@ static inline void gen_set_NZ64(TCGv_i64 result) TCGv_i64 flag = tcg_temp_new_i64(); tcg_gen_setcondi_i64(TCG_COND_NE, flag, result, 0); - tcg_gen_trunc_i64_i32(cpu_ZF, flag); + tcg_gen_extrl_i64_i32(cpu_ZF, flag); tcg_gen_shri_i64(flag, result, 32); - tcg_gen_trunc_i64_i32(cpu_NF, flag); + tcg_gen_extrl_i64_i32(cpu_NF, flag); tcg_temp_free_i64(flag); } @@ -540,8 +540,8 @@ static inline void gen_logic_CC(int sf, TCGv_i64 result) if (sf) { gen_set_NZ64(result); } else { - tcg_gen_trunc_i64_i32(cpu_ZF, result); - tcg_gen_trunc_i64_i32(cpu_NF, result); + tcg_gen_extrl_i64_i32(cpu_ZF, result); + tcg_gen_extrl_i64_i32(cpu_NF, result); } tcg_gen_movi_i32(cpu_CF, 0); tcg_gen_movi_i32(cpu_VF, 0); @@ -559,7 +559,7 @@ static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) tcg_gen_movi_i64(tmp, 0); tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); - tcg_gen_trunc_i64_i32(cpu_CF, flag); + tcg_gen_extrl_i64_i32(cpu_CF, flag); gen_set_NZ64(result); @@ -568,7 +568,7 @@ static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) tcg_gen_andc_i64(flag, flag, tmp); tcg_temp_free_i64(tmp); tcg_gen_shri_i64(flag, flag, 32); - tcg_gen_trunc_i64_i32(cpu_VF, flag); + tcg_gen_extrl_i64_i32(cpu_VF, flag); tcg_gen_mov_i64(dest, result); tcg_temp_free_i64(result); @@ -580,8 +580,8 @@ static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) TCGv_i32 tmp = tcg_temp_new_i32(); tcg_gen_movi_i32(tmp, 0); - tcg_gen_trunc_i64_i32(t0_32, t0); - tcg_gen_trunc_i64_i32(t1_32, t1); + tcg_gen_extrl_i64_i32(t0_32, t0); + tcg_gen_extrl_i64_i32(t1_32, t1); tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp); tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32); @@ -609,7 +609,7 @@ static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) gen_set_NZ64(result); tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1); - tcg_gen_trunc_i64_i32(cpu_CF, flag); + tcg_gen_extrl_i64_i32(cpu_CF, flag); tcg_gen_xor_i64(flag, result, t0); tmp = tcg_temp_new_i64(); @@ -617,7 +617,7 @@ static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) tcg_gen_and_i64(flag, flag, tmp); tcg_temp_free_i64(tmp); tcg_gen_shri_i64(flag, flag, 32); - tcg_gen_trunc_i64_i32(cpu_VF, flag); + tcg_gen_extrl_i64_i32(cpu_VF, flag); tcg_gen_mov_i64(dest, result); tcg_temp_free_i64(flag); tcg_temp_free_i64(result); @@ -627,8 +627,8 @@ static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) TCGv_i32 t1_32 = tcg_temp_new_i32(); TCGv_i32 tmp; - tcg_gen_trunc_i64_i32(t0_32, t0); - tcg_gen_trunc_i64_i32(t1_32, t1); + tcg_gen_extrl_i64_i32(t0_32, t0); + tcg_gen_extrl_i64_i32(t1_32, t1); tcg_gen_sub_i32(cpu_NF, t0_32, t1_32); tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32); @@ -670,14 +670,14 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) tcg_gen_extu_i32_i64(cf_64, cpu_CF); tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp); tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp); - tcg_gen_trunc_i64_i32(cpu_CF, cf_64); + tcg_gen_extrl_i64_i32(cpu_CF, cf_64); gen_set_NZ64(result); tcg_gen_xor_i64(vf_64, result, t0); tcg_gen_xor_i64(tmp, t0, t1); tcg_gen_andc_i64(vf_64, vf_64, tmp); tcg_gen_shri_i64(vf_64, vf_64, 32); - tcg_gen_trunc_i64_i32(cpu_VF, vf_64); + tcg_gen_extrl_i64_i32(cpu_VF, vf_64); tcg_gen_mov_i64(dest, result); @@ -691,8 +691,8 @@ static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) t1_32 = tcg_temp_new_i32(); tmp = tcg_const_i32(0); - tcg_gen_trunc_i64_i32(t0_32, t0); - tcg_gen_trunc_i64_i32(t1_32, t1); + tcg_gen_extrl_i64_i32(t0_32, t0); + tcg_gen_extrl_i64_i32(t1_32, t1); tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp); tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp); @@ -1301,7 +1301,7 @@ static void gen_set_nzcv(TCGv_i64 tcg_rt) TCGv_i32 nzcv = tcg_temp_new_i32(); /* take NZCV from R[t] */ - tcg_gen_trunc_i64_i32(nzcv, tcg_rt); + tcg_gen_extrl_i64_i32(nzcv, tcg_rt); /* bit 31, N */ tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31)); @@ -3131,8 +3131,8 @@ static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf, TCGv_i32 t0, t1; t0 = tcg_temp_new_i32(); t1 = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(t0, src); - tcg_gen_trunc_i64_i32(t1, shift_amount); + tcg_gen_extrl_i64_i32(t0, src); + tcg_gen_extrl_i64_i32(t1, shift_amount); tcg_gen_rotr_i32(t0, t0, t1); tcg_gen_extu_i32_i64(dst, t0); tcg_temp_free_i32(t0); @@ -3680,7 +3680,7 @@ static void handle_clz(DisasContext *s, unsigned int sf, gen_helper_clz64(tcg_rd, tcg_rn); } else { TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn); + tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); gen_helper_clz(tcg_tmp32, tcg_tmp32); tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); tcg_temp_free_i32(tcg_tmp32); @@ -3698,7 +3698,7 @@ static void handle_cls(DisasContext *s, unsigned int sf, gen_helper_cls64(tcg_rd, tcg_rn); } else { TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn); + tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); gen_helper_cls32(tcg_tmp32, tcg_tmp32); tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); tcg_temp_free_i32(tcg_tmp32); @@ -3716,7 +3716,7 @@ static void handle_rbit(DisasContext *s, unsigned int sf, gen_helper_rbit64(tcg_rd, tcg_rn); } else { TCGv_i32 tcg_tmp32 = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(tcg_tmp32, tcg_rn); + tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn); gen_helper_rbit(tcg_tmp32, tcg_tmp32); tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32); tcg_temp_free_i32(tcg_tmp32); @@ -5475,16 +5475,16 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) assert(elements == 4); read_vec_element(s, tcg_elt, rn, 0, MO_32); - tcg_gen_trunc_i64_i32(tcg_elt1, tcg_elt); + tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt); read_vec_element(s, tcg_elt, rn, 1, MO_32); - tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt); + tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt); do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst); read_vec_element(s, tcg_elt, rn, 2, MO_32); - tcg_gen_trunc_i64_i32(tcg_elt2, tcg_elt); + tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt); read_vec_element(s, tcg_elt, rn, 3, MO_32); - tcg_gen_trunc_i64_i32(tcg_elt3, tcg_elt); + tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt); do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst); @@ -7647,7 +7647,7 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, static NeonGenNarrowFn * const xtnfns[3] = { gen_helper_neon_narrow_u8, gen_helper_neon_narrow_u16, - tcg_gen_trunc_i64_i32, + tcg_gen_extrl_i64_i32, }; static NeonGenNarrowEnvFn * const sqxtunfns[3] = { gen_helper_neon_unarrow_sat8, @@ -7681,10 +7681,10 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar, } else { TCGv_i32 tcg_lo = tcg_temp_new_i32(); TCGv_i32 tcg_hi = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(tcg_lo, tcg_op); + tcg_gen_extrl_i64_i32(tcg_lo, tcg_op); gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env); tcg_gen_shri_i64(tcg_op, tcg_op, 32); - tcg_gen_trunc_i64_i32(tcg_hi, tcg_op); + tcg_gen_extrl_i64_i32(tcg_hi, tcg_op); gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env); tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16); tcg_temp_free_i32(tcg_lo); @@ -8593,7 +8593,7 @@ static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size, static void do_narrow_high_u32(TCGv_i32 res, TCGv_i64 in) { tcg_gen_shri_i64(in, in, 32); - tcg_gen_trunc_i64_i32(res, in); + tcg_gen_extrl_i64_i32(res, in); } static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in) diff --git a/target-arm/translate.c b/target-arm/translate.c index 69ac18c108..e27634f3c8 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -1557,7 +1557,7 @@ static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest) } else { tmp = tcg_temp_new_i32(); iwmmxt_load_reg(cpu_V0, rd); - tcg_gen_trunc_i64_i32(tmp, cpu_V0); + tcg_gen_extrl_i64_i32(tmp, cpu_V0); } tcg_gen_andi_i32(tmp, tmp, mask); tcg_gen_mov_i32(dest, tmp); @@ -1581,9 +1581,9 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) rdhi = (insn >> 16) & 0xf; if (insn & ARM_CP_RW_BIT) { /* TMRRC */ iwmmxt_load_reg(cpu_V0, wrd); - tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0); + tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0); tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); - tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0); + tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0); } else { /* TMCRR */ tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]); iwmmxt_store_reg(cpu_V0, wrd); @@ -1638,15 +1638,15 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) if (insn & (1 << 22)) { /* WSTRD */ gen_aa32_st64(cpu_M0, addr, get_mem_index(s)); } else { /* WSTRW wRd */ - tcg_gen_trunc_i64_i32(tmp, cpu_M0); + tcg_gen_extrl_i64_i32(tmp, cpu_M0); gen_aa32_st32(tmp, addr, get_mem_index(s)); } } else { if (insn & (1 << 22)) { /* WSTRH */ - tcg_gen_trunc_i64_i32(tmp, cpu_M0); + tcg_gen_extrl_i64_i32(tmp, cpu_M0); gen_aa32_st16(tmp, addr, get_mem_index(s)); } else { /* WSTRB */ - tcg_gen_trunc_i64_i32(tmp, cpu_M0); + tcg_gen_extrl_i64_i32(tmp, cpu_M0); gen_aa32_st8(tmp, addr, get_mem_index(s)); } } @@ -1946,7 +1946,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) switch ((insn >> 22) & 3) { case 0: tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3); - tcg_gen_trunc_i64_i32(tmp, cpu_M0); + tcg_gen_extrl_i64_i32(tmp, cpu_M0); if (insn & 8) { tcg_gen_ext8s_i32(tmp, tmp); } else { @@ -1955,7 +1955,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) break; case 1: tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4); - tcg_gen_trunc_i64_i32(tmp, cpu_M0); + tcg_gen_extrl_i64_i32(tmp, cpu_M0); if (insn & 8) { tcg_gen_ext16s_i32(tmp, tmp); } else { @@ -1964,7 +1964,7 @@ static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn) break; case 2: tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5); - tcg_gen_trunc_i64_i32(tmp, cpu_M0); + tcg_gen_extrl_i64_i32(tmp, cpu_M0); break; } store_reg(s, rd, tmp); @@ -2627,9 +2627,9 @@ static int disas_dsp_insn(DisasContext *s, uint32_t insn) if (insn & ARM_CP_RW_BIT) { /* MRA */ iwmmxt_load_reg(cpu_V0, acc); - tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0); + tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0); tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); - tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0); + tcg_gen_extrl_i64_i32(cpu_R[rdhi], cpu_V0); tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1); } else { /* MAR */ tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]); @@ -2951,7 +2951,7 @@ static int handle_vcvt(uint32_t insn, uint32_t rd, uint32_t rm, uint32_t dp, } else { gen_helper_vfp_tould(tcg_res, tcg_double, tcg_shift, fpst); } - tcg_gen_trunc_i64_i32(tcg_tmp, tcg_res); + tcg_gen_extrl_i64_i32(tcg_tmp, tcg_res); tcg_gen_st_f32(tcg_tmp, cpu_env, vfp_reg_offset(0, rd)); tcg_temp_free_i32(tcg_tmp); tcg_temp_free_i64(tcg_res); @@ -4683,7 +4683,7 @@ static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src) switch (size) { case 0: gen_helper_neon_narrow_u8(dest, src); break; case 1: gen_helper_neon_narrow_u16(dest, src); break; - case 2: tcg_gen_trunc_i64_i32(dest, src); break; + case 2: tcg_gen_extrl_i64_i32(dest, src); break; default: abort(); } } @@ -6254,7 +6254,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) break; case 2: tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); - tcg_gen_trunc_i64_i32(tmp, cpu_V0); + tcg_gen_extrl_i64_i32(tmp, cpu_V0); break; default: abort(); } @@ -6269,7 +6269,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case 2: tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31); tcg_gen_shri_i64(cpu_V0, cpu_V0, 32); - tcg_gen_trunc_i64_i32(tmp, cpu_V0); + tcg_gen_extrl_i64_i32(tmp, cpu_V0); break; default: abort(); } @@ -7224,11 +7224,11 @@ static int disas_coproc_insn(DisasContext *s, uint32_t insn) tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset); } tmp = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_gen_extrl_i64_i32(tmp, tmp64); store_reg(s, rt, tmp); tcg_gen_shri_i64(tmp64, tmp64, 32); tmp = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_gen_extrl_i64_i32(tmp, tmp64); tcg_temp_free_i64(tmp64); store_reg(s, rt2, tmp); } else { @@ -7334,11 +7334,11 @@ static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val) { TCGv_i32 tmp; tmp = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(tmp, val); + tcg_gen_extrl_i64_i32(tmp, val); store_reg(s, rlow, tmp); tmp = tcg_temp_new_i32(); tcg_gen_shri_i64(val, val, 32); - tcg_gen_trunc_i64_i32(tmp, val); + tcg_gen_extrl_i64_i32(tmp, val); store_reg(s, rhigh, tmp); } @@ -8013,7 +8013,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) tmp64 = gen_muls_i64_i32(tmp, tmp2); tcg_gen_shri_i64(tmp64, tmp64, 16); tmp = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_gen_extrl_i64_i32(tmp, tmp64); tcg_temp_free_i64(tmp64); if ((sh & 2) == 0) { tmp2 = load_reg(s, rn); @@ -8679,7 +8679,7 @@ static void disas_arm_insn(DisasContext *s, unsigned int insn) } tcg_gen_shri_i64(tmp64, tmp64, 32); tmp = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_gen_extrl_i64_i32(tmp, tmp64); tcg_temp_free_i64(tmp64); store_reg(s, rn, tmp); break; @@ -9749,7 +9749,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw tmp64 = gen_muls_i64_i32(tmp, tmp2); tcg_gen_shri_i64(tmp64, tmp64, 16); tmp = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_gen_extrl_i64_i32(tmp, tmp64); tcg_temp_free_i64(tmp64); if (rs != 15) { @@ -9773,7 +9773,7 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw } tcg_gen_shri_i64(tmp64, tmp64, 32); tmp = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(tmp, tmp64); + tcg_gen_extrl_i64_i32(tmp, tmp64); tcg_temp_free_i64(tmp64); break; case 7: /* Unsigned sum of absolute differences. */ diff --git a/target-cris/translate.c b/target-cris/translate.c index 3e59601eb4..5699826c8b 100644 --- a/target-cris/translate.c +++ b/target-cris/translate.c @@ -2604,9 +2604,9 @@ static int dec_movem_mr(CPUCRISState *env, DisasContext *dc) tcg_temp_free(addr); for (i = 0; i < (nr >> 1); i++) { - tcg_gen_trunc_i64_i32(cpu_R[i * 2], tmp[i]); + tcg_gen_extrl_i64_i32(cpu_R[i * 2], tmp[i]); tcg_gen_shri_i64(tmp[i], tmp[i], 32); - tcg_gen_trunc_i64_i32(cpu_R[i * 2 + 1], tmp[i]); + tcg_gen_extrl_i64_i32(cpu_R[i * 2 + 1], tmp[i]); tcg_temp_free_i64(tmp[i]); } if (nr & 1) { diff --git a/target-m68k/translate.c b/target-m68k/translate.c index a57d2415c9..3cdf6652aa 100644 --- a/target-m68k/translate.c +++ b/target-m68k/translate.c @@ -2680,7 +2680,7 @@ DISAS_INSN(from_mac) if (s->env->macsr & MACSR_FI) { gen_helper_get_macf(rx, cpu_env, acc); } else if ((s->env->macsr & MACSR_OMC) == 0) { - tcg_gen_trunc_i64_i32(rx, acc); + tcg_gen_extrl_i64_i32(rx, acc); } else if (s->env->macsr & MACSR_SU) { gen_helper_get_macs(rx, acc); } else { diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c index f4e969b29c..47ac18015e 100644 --- a/target-microblaze/translate.c +++ b/target-microblaze/translate.c @@ -598,9 +598,9 @@ static void t_gen_muls(TCGv d, TCGv d2, TCGv a, TCGv b) tcg_gen_ext_i32_i64(t1, b); tcg_gen_mul_i64(t0, t0, t1); - tcg_gen_trunc_i64_i32(d, t0); + tcg_gen_extrl_i64_i32(d, t0); tcg_gen_shri_i64(t0, t0, 32); - tcg_gen_trunc_i64_i32(d2, t0); + tcg_gen_extrl_i64_i32(d2, t0); tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); @@ -618,9 +618,9 @@ static void t_gen_mulu(TCGv d, TCGv d2, TCGv a, TCGv b) tcg_gen_extu_i32_i64(t1, b); tcg_gen_mul_i64(t0, t0, t1); - tcg_gen_trunc_i64_i32(d, t0); + tcg_gen_extrl_i64_i32(d, t0); tcg_gen_shri_i64(t0, t0, 32); - tcg_gen_trunc_i64_i32(d2, t0); + tcg_gen_extrl_i64_i32(d2, t0); tcg_temp_free_i64(t0); tcg_temp_free_i64(t1); diff --git a/target-mips/translate.c b/target-mips/translate.c index 98cf72de74..93cb4f2731 100644 --- a/target-mips/translate.c +++ b/target-mips/translate.c @@ -1629,7 +1629,7 @@ static void gen_load_fpr32(DisasContext *ctx, TCGv_i32 t, int reg) if (ctx->hflags & MIPS_HFLAG_FRE) { generate_exception(ctx, EXCP_RI); } - tcg_gen_trunc_i64_i32(t, fpu_f64[reg]); + tcg_gen_extrl_i64_i32(t, fpu_f64[reg]); } static void gen_store_fpr32(DisasContext *ctx, TCGv_i32 t, int reg) @@ -1649,7 +1649,7 @@ static void gen_load_fpr32h(DisasContext *ctx, TCGv_i32 t, int reg) if (ctx->hflags & MIPS_HFLAG_F64) { TCGv_i64 t64 = tcg_temp_new_i64(); tcg_gen_shri_i64(t64, fpu_f64[reg], 32); - tcg_gen_trunc_i64_i32(t, t64); + tcg_gen_extrl_i64_i32(t, t64); tcg_temp_free_i64(t64); } else { gen_load_fpr32(ctx, t, reg | 1); diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c index a62cbf4011..aca1242bdb 100644 --- a/target-openrisc/translate.c +++ b/target-openrisc/translate.c @@ -279,7 +279,7 @@ static void dec_calc(DisasContext *dc, uint32_t insn) tcg_gen_extu_i32_i64(ta, cpu_R[ra]); tcg_gen_extu_i32_i64(tb, cpu_R[rb]); tcg_gen_add_i64(td, ta, tb); - tcg_gen_trunc_i64_i32(res, td); + tcg_gen_extrl_i64_i32(res, td); tcg_gen_shri_i64(td, td, 31); tcg_gen_andi_i64(td, td, 0x3); /* Jump to lab when no overflow. */ @@ -324,7 +324,7 @@ static void dec_calc(DisasContext *dc, uint32_t insn) tcg_gen_shri_i64(tcy, tcy, 10); tcg_gen_add_i64(td, ta, tb); tcg_gen_add_i64(td, td, tcy); - tcg_gen_trunc_i64_i32(res, td); + tcg_gen_extrl_i64_i32(res, td); tcg_gen_shri_i64(td, td, 32); tcg_gen_andi_i64(td, td, 0x3); /* Jump to lab when no overflow. */ @@ -366,7 +366,7 @@ static void dec_calc(DisasContext *dc, uint32_t insn) tcg_gen_extu_i32_i64(ta, cpu_R[ra]); tcg_gen_extu_i32_i64(tb, cpu_R[rb]); tcg_gen_sub_i64(td, ta, tb); - tcg_gen_trunc_i64_i32(res, td); + tcg_gen_extrl_i64_i32(res, td); tcg_gen_shri_i64(td, td, 31); tcg_gen_andi_i64(td, td, 0x3); /* Jump to lab when no overflow. */ @@ -779,9 +779,9 @@ static void dec_misc(DisasContext *dc, uint32_t insn) tcg_gen_ext_i32_i64(t1, dst); tcg_gen_concat_i32_i64(t2, maclo, machi); tcg_gen_add_i64(t2, t2, t1); - tcg_gen_trunc_i64_i32(maclo, t2); + tcg_gen_extrl_i64_i32(maclo, t2); tcg_gen_shri_i64(t2, t2, 32); - tcg_gen_trunc_i64_i32(machi, t2); + tcg_gen_extrl_i64_i32(machi, t2); tcg_temp_free_i32(dst); tcg_temp_free(ttmp); tcg_temp_free_i64(t1); @@ -898,7 +898,7 @@ static void dec_misc(DisasContext *dc, uint32_t insn) TCGv_i32 sr_ove = tcg_temp_local_new_i32(); tcg_gen_extu_i32_i64(ta, cpu_R[ra]); tcg_gen_addi_i64(td, ta, sign_extend(I16, 16)); - tcg_gen_trunc_i64_i32(res, td); + tcg_gen_extrl_i64_i32(res, td); tcg_gen_shri_i64(td, td, 32); tcg_gen_andi_i64(td, td, 0x3); /* Jump to lab when no overflow. */ @@ -934,7 +934,7 @@ static void dec_misc(DisasContext *dc, uint32_t insn) tcg_gen_extu_i32_i64(tcy, sr_cy); tcg_gen_addi_i64(td, ta, sign_extend(I16, 16)); tcg_gen_add_i64(td, td, tcy); - tcg_gen_trunc_i64_i32(res, td); + tcg_gen_extrl_i64_i32(res, td); tcg_gen_shri_i64(td, td, 32); tcg_gen_andi_i64(td, td, 0x3); /* Jump to lab when no overflow. */ @@ -1073,9 +1073,9 @@ static void dec_mac(DisasContext *dc, uint32_t insn) tcg_gen_ext_i32_i64(t1, t0); tcg_gen_concat_i32_i64(t2, maclo, machi); tcg_gen_add_i64(t2, t2, t1); - tcg_gen_trunc_i64_i32(maclo, t2); + tcg_gen_extrl_i64_i32(maclo, t2); tcg_gen_shri_i64(t2, t2, 32); - tcg_gen_trunc_i64_i32(machi, t2); + tcg_gen_extrl_i64_i32(machi, t2); tcg_temp_free_i32(t0); tcg_temp_free_i64(t1); tcg_temp_free_i64(t2); @@ -1092,9 +1092,9 @@ static void dec_mac(DisasContext *dc, uint32_t insn) tcg_gen_ext_i32_i64(t1, t0); tcg_gen_concat_i32_i64(t2, maclo, machi); tcg_gen_sub_i64(t2, t2, t1); - tcg_gen_trunc_i64_i32(maclo, t2); + tcg_gen_extrl_i64_i32(maclo, t2); tcg_gen_shri_i64(t2, t2, 32); - tcg_gen_trunc_i64_i32(machi, t2); + tcg_gen_extrl_i64_i32(machi, t2); tcg_temp_free_i32(t0); tcg_temp_free_i64(t1); tcg_temp_free_i64(t2); diff --git a/target-s390x/translate.c b/target-s390x/translate.c index c748290d5c..2bca33acca 100644 --- a/target-s390x/translate.c +++ b/target-s390x/translate.c @@ -811,7 +811,7 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask) case CC_OP_LTGT0_32: c->is_64 = false; c->u.s32.a = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(c->u.s32.a, cc_dst); + tcg_gen_extrl_i64_i32(c->u.s32.a, cc_dst); c->u.s32.b = tcg_const_i32(0); break; case CC_OP_LTGT_32: @@ -819,9 +819,9 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask) case CC_OP_SUBU_32: c->is_64 = false; c->u.s32.a = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(c->u.s32.a, cc_src); + tcg_gen_extrl_i64_i32(c->u.s32.a, cc_src); c->u.s32.b = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(c->u.s32.b, cc_dst); + tcg_gen_extrl_i64_i32(c->u.s32.b, cc_dst); break; case CC_OP_LTGT0_64: @@ -851,11 +851,11 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask) c->is_64 = false; c->u.s32.a = tcg_temp_new_i32(); c->u.s32.b = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(c->u.s32.a, cc_vr); + tcg_gen_extrl_i64_i32(c->u.s32.a, cc_vr); if (cond == TCG_COND_EQ || cond == TCG_COND_NE) { tcg_gen_movi_i32(c->u.s32.b, 0); } else { - tcg_gen_trunc_i64_i32(c->u.s32.b, cc_src); + tcg_gen_extrl_i64_i32(c->u.s32.b, cc_src); } break; @@ -1532,7 +1532,7 @@ static ExitStatus op_bct32(DisasContext *s, DisasOps *o) store_reg32_i64(r1, t); c.u.s32.a = tcg_temp_new_i32(); c.u.s32.b = tcg_const_i32(0); - tcg_gen_trunc_i64_i32(c.u.s32.a, t); + tcg_gen_extrl_i64_i32(c.u.s32.a, t); tcg_temp_free_i64(t); return help_branch(s, &c, is_imm, imm, o->in2); @@ -1556,7 +1556,7 @@ static ExitStatus op_bcth(DisasContext *s, DisasOps *o) store_reg32h_i64(r1, t); c.u.s32.a = tcg_temp_new_i32(); c.u.s32.b = tcg_const_i32(0); - tcg_gen_trunc_i64_i32(c.u.s32.a, t); + tcg_gen_extrl_i64_i32(c.u.s32.a, t); tcg_temp_free_i64(t); return help_branch(s, &c, 1, imm, o->in2); @@ -1599,8 +1599,8 @@ static ExitStatus op_bx32(DisasContext *s, DisasOps *o) tcg_gen_add_i64(t, regs[r1], regs[r3]); c.u.s32.a = tcg_temp_new_i32(); c.u.s32.b = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(c.u.s32.a, t); - tcg_gen_trunc_i64_i32(c.u.s32.b, regs[r3 | 1]); + tcg_gen_extrl_i64_i32(c.u.s32.a, t); + tcg_gen_extrl_i64_i32(c.u.s32.b, regs[r3 | 1]); store_reg32_i64(r1, t); tcg_temp_free_i64(t); @@ -1905,7 +1905,7 @@ static ExitStatus op_clm(DisasContext *s, DisasOps *o) { TCGv_i32 m3 = tcg_const_i32(get_field(s->fields, m3)); TCGv_i32 t1 = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(t1, o->in1); + tcg_gen_extrl_i64_i32(t1, o->in1); potential_page_fault(s); gen_helper_clm(cc_op, cpu_env, t1, m3, o->in2); set_cc_static(s); @@ -1977,7 +1977,7 @@ static ExitStatus op_cs(DisasContext *s, DisasOps *o) /* Store CC back to cc_op. Wait until after the store so that any exception gets the old cc_op value. */ - tcg_gen_trunc_i64_i32(cc_op, cc); + tcg_gen_extrl_i64_i32(cc_op, cc); tcg_temp_free_i64(cc); set_cc_static(s); return NO_EXIT; @@ -2027,7 +2027,7 @@ static ExitStatus op_cdsg(DisasContext *s, DisasOps *o) /* Save back state now that we've passed all exceptions. */ tcg_gen_mov_i64(regs[r1], outh); tcg_gen_mov_i64(regs[r1 + 1], outl); - tcg_gen_trunc_i64_i32(cc_op, cc); + tcg_gen_extrl_i64_i32(cc_op, cc); tcg_temp_free_i64(outh); tcg_temp_free_i64(outl); tcg_temp_free_i64(cc); @@ -2051,7 +2051,7 @@ static ExitStatus op_cvd(DisasContext *s, DisasOps *o) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i32 t2 = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(t2, o->in1); + tcg_gen_extrl_i64_i32(t2, o->in1); gen_helper_cvd(t1, t2); tcg_temp_free_i32(t2); tcg_gen_qemu_st64(t1, o->in2, get_mem_index(s)); @@ -3235,8 +3235,8 @@ static ExitStatus op_rll32(DisasContext *s, DisasOps *o) TCGv_i32 t1 = tcg_temp_new_i32(); TCGv_i32 t2 = tcg_temp_new_i32(); TCGv_i32 to = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(t1, o->in1); - tcg_gen_trunc_i64_i32(t2, o->in2); + tcg_gen_extrl_i64_i32(t1, o->in1); + tcg_gen_extrl_i64_i32(t2, o->in2); tcg_gen_rotl_i32(to, t1, t2); tcg_gen_extu_i32_i64(o->out, to); tcg_temp_free_i32(t1); diff --git a/target-sh4/translate.c b/target-sh4/translate.c index 3b4a1b5cea..be0cb321cf 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -288,10 +288,10 @@ static inline void gen_load_fpr64(TCGv_i64 t, int reg) static inline void gen_store_fpr64 (TCGv_i64 t, int reg) { TCGv_i32 tmp = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(tmp, t); + tcg_gen_extrl_i64_i32(tmp, t); tcg_gen_mov_i32(cpu_fregs[reg + 1], tmp); tcg_gen_shri_i64(t, t, 32); - tcg_gen_trunc_i64_i32(tmp, t); + tcg_gen_extrl_i64_i32(tmp, t); tcg_gen_mov_i32(cpu_fregs[reg], tmp); tcg_temp_free_i32(tmp); } diff --git a/target-sparc/translate.c b/target-sparc/translate.c index c58dd4e95b..48fc2abe63 100644 --- a/target-sparc/translate.c +++ b/target-sparc/translate.c @@ -164,7 +164,7 @@ static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src) TCGv_i64 t = tcg_temp_new_i64(); tcg_gen_shri_i64(t, cpu_fpr[src / 2], 32); - tcg_gen_trunc_i64_i32(ret, t); + tcg_gen_extrl_i64_i32(ret, t); tcg_temp_free_i64(t); return ret; @@ -379,8 +379,8 @@ static TCGv_i32 gen_add32_carry32(void) #if TARGET_LONG_BITS == 64 cc_src1_32 = tcg_temp_new_i32(); cc_src2_32 = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(cc_src1_32, cpu_cc_dst); - tcg_gen_trunc_i64_i32(cc_src2_32, cpu_cc_src); + tcg_gen_extrl_i64_i32(cc_src1_32, cpu_cc_dst); + tcg_gen_extrl_i64_i32(cc_src2_32, cpu_cc_src); #else cc_src1_32 = cpu_cc_dst; cc_src2_32 = cpu_cc_src; @@ -405,8 +405,8 @@ static TCGv_i32 gen_sub32_carry32(void) #if TARGET_LONG_BITS == 64 cc_src1_32 = tcg_temp_new_i32(); cc_src2_32 = tcg_temp_new_i32(); - tcg_gen_trunc_i64_i32(cc_src1_32, cpu_cc_src); - tcg_gen_trunc_i64_i32(cc_src2_32, cpu_cc_src2); + tcg_gen_extrl_i64_i32(cc_src1_32, cpu_cc_src); + tcg_gen_extrl_i64_i32(cc_src2_32, cpu_cc_src2); #else cc_src1_32 = cpu_cc_src; cc_src2_32 = cpu_cc_src2; @@ -2254,11 +2254,11 @@ static void gen_fmovs(DisasContext *dc, DisasCompare *cmp, int rd, int rs) the later. */ c32 = tcg_temp_new_i32(); if (cmp->is_bool) { - tcg_gen_trunc_i64_i32(c32, cmp->c1); + tcg_gen_extrl_i64_i32(c32, cmp->c1); } else { TCGv_i64 c64 = tcg_temp_new_i64(); tcg_gen_setcond_i64(cmp->cond, c64, cmp->c1, cmp->c2); - tcg_gen_trunc_i64_i32(c32, c64); + tcg_gen_extrl_i64_i32(c32, c64); tcg_temp_free_i64(c64); } diff --git a/target-tricore/translate.c b/target-tricore/translate.c index 70f09300ee..f02bef41ee 100644 --- a/target-tricore/translate.c +++ b/target-tricore/translate.c @@ -540,14 +540,14 @@ static inline void gen_madd32_d(TCGv ret, TCGv r1, TCGv r2, TCGv r3) tcg_gen_mul_i64(t1, t1, t3); tcg_gen_add_i64(t1, t2, t1); - tcg_gen_trunc_i64_i32(ret, t1); + tcg_gen_extrl_i64_i32(ret, t1); /* calc V t1 > 0x7fffffff */ tcg_gen_setcondi_i64(TCG_COND_GT, t3, t1, 0x7fffffffLL); /* t1 < -0x80000000 */ tcg_gen_setcondi_i64(TCG_COND_LT, t2, t1, -0x80000000LL); tcg_gen_or_i64(t2, t2, t3); - tcg_gen_trunc_i64_i32(cpu_PSW_V, t2); + tcg_gen_extrl_i64_i32(cpu_PSW_V, t2); tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); /* Calc SV bit */ tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); @@ -621,7 +621,7 @@ gen_maddu64_d(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, /* only the add overflows, if t2 < t1 calc V bit */ tcg_gen_setcond_i64(TCG_COND_LTU, t2, t2, t1); - tcg_gen_trunc_i64_i32(cpu_PSW_V, t2); + tcg_gen_extrl_i64_i32(cpu_PSW_V, t2); tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); /* Calc SV bit */ tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); @@ -1110,12 +1110,12 @@ gen_madd32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n, tcg_gen_sari_i64(t2, t2, up_shift); tcg_gen_add_i64(t3, t1, t2); - tcg_gen_trunc_i64_i32(temp3, t3); + tcg_gen_extrl_i64_i32(temp3, t3); /* calc v bit */ tcg_gen_setcondi_i64(TCG_COND_GT, t1, t3, 0x7fffffffLL); tcg_gen_setcondi_i64(TCG_COND_LT, t2, t3, -0x80000000LL); tcg_gen_or_i64(t1, t1, t2); - tcg_gen_trunc_i64_i32(cpu_PSW_V, t1); + tcg_gen_extrl_i64_i32(cpu_PSW_V, t1); tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); /* We produce an overflow on the host if the mul before was (0x80000000 * 0x80000000) << 1). If this is the @@ -1356,14 +1356,14 @@ static inline void gen_msub32_d(TCGv ret, TCGv r1, TCGv r2, TCGv r3) tcg_gen_mul_i64(t1, t1, t3); tcg_gen_sub_i64(t1, t2, t1); - tcg_gen_trunc_i64_i32(ret, t1); + tcg_gen_extrl_i64_i32(ret, t1); /* calc V t2 > 0x7fffffff */ tcg_gen_setcondi_i64(TCG_COND_GT, t3, t1, 0x7fffffffLL); /* result < -0x80000000 */ tcg_gen_setcondi_i64(TCG_COND_LT, t2, t1, -0x80000000LL); tcg_gen_or_i64(t2, t2, t3); - tcg_gen_trunc_i64_i32(cpu_PSW_V, t2); + tcg_gen_extrl_i64_i32(cpu_PSW_V, t2); tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); /* Calc SV bit */ @@ -1445,7 +1445,7 @@ gen_msubu64_d(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high, tcg_gen_extr_i64_i32(ret_low, ret_high, t3); /* calc V bit, only the sub can overflow, if t1 > t2 */ tcg_gen_setcond_i64(TCG_COND_GTU, t1, t1, t2); - tcg_gen_trunc_i64_i32(cpu_PSW_V, t1); + tcg_gen_extrl_i64_i32(cpu_PSW_V, t1); tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); /* Calc SV bit */ tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); @@ -1973,12 +1973,12 @@ gen_msub32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n, tcg_gen_add_i64(t2, t2, t4); tcg_gen_sub_i64(t3, t1, t2); - tcg_gen_trunc_i64_i32(temp3, t3); + tcg_gen_extrl_i64_i32(temp3, t3); /* calc v bit */ tcg_gen_setcondi_i64(TCG_COND_GT, t1, t3, 0x7fffffffLL); tcg_gen_setcondi_i64(TCG_COND_LT, t2, t3, -0x80000000LL); tcg_gen_or_i64(t1, t1, t2); - tcg_gen_trunc_i64_i32(cpu_PSW_V, t1); + tcg_gen_extrl_i64_i32(cpu_PSW_V, t1); tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31); /* Calc SV bit */ tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V); diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c index f2118c24c0..a29b3e61bc 100644 --- a/target-xtensa/translate.c +++ b/target-xtensa/translate.c @@ -1544,7 +1544,7 @@ static void disas_xtensa_insn(CPUXtensaState *env, DisasContext *dc) TCGv_i64 tmp = tcg_temp_new_i64(); \ tcg_gen_extu_i32_i64(tmp, reg); \ tcg_gen_##cmd##_i64(v, v, tmp); \ - tcg_gen_trunc_i64_i32(cpu_R[RRR_R], v); \ + tcg_gen_extrl_i64_i32(cpu_R[RRR_R], v); \ tcg_temp_free_i64(v); \ tcg_temp_free_i64(tmp); \ } while (0) diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 6b59eedf74..6da083a1e9 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -694,11 +694,6 @@ static inline void tcg_gen_concat32_i64(TCGv_i64 ret, TCGv_i64 lo, TCGv_i64 hi) tcg_gen_deposit_i64(ret, lo, hi, 32, 32); } -static inline void tcg_gen_trunc_i64_i32(TCGv_i32 ret, TCGv_i64 arg) -{ - tcg_gen_extrl_i64_i32(ret, arg); -} - /* QEMU specific operations. */ #ifndef TARGET_LONG_BITS @@ -854,7 +849,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_divu_tl tcg_gen_divu_i64 #define tcg_gen_remu_tl tcg_gen_remu_i64 #define tcg_gen_discard_tl tcg_gen_discard_i64 -#define tcg_gen_trunc_tl_i32 tcg_gen_trunc_i64_i32 +#define tcg_gen_trunc_tl_i32 tcg_gen_extrl_i64_i32 #define tcg_gen_trunc_i64_tl tcg_gen_mov_i64 #define tcg_gen_extu_i32_tl tcg_gen_extu_i32_i64 #define tcg_gen_ext_i32_tl tcg_gen_ext_i32_i64 @@ -933,7 +928,7 @@ static inline void tcg_gen_qemu_st64(TCGv_i64 arg, TCGv addr, int mem_index) #define tcg_gen_remu_tl tcg_gen_remu_i32 #define tcg_gen_discard_tl tcg_gen_discard_i32 #define tcg_gen_trunc_tl_i32 tcg_gen_mov_i32 -#define tcg_gen_trunc_i64_tl tcg_gen_trunc_i64_i32 +#define tcg_gen_trunc_i64_tl tcg_gen_extrl_i64_i32 #define tcg_gen_extu_i32_tl tcg_gen_mov_i32 #define tcg_gen_ext_i32_tl tcg_gen_mov_i32 #define tcg_gen_extu_tl_i64 tcg_gen_extu_i32_i64 From 8cc580f6a0d8c0e2f590c1472cf5cd8e51761760 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Thu, 9 Jul 2015 20:39:57 +0200 Subject: [PATCH 13/18] tcg/i386: use softmmu fast path for unaligned accesses Softmmu unaligned load/stores currently goes through through the slow path for two reasons: - to support unaligned access on host with strict alignement - to correctly handle accesses crossing pages x86 is only concerned by the second reason. Unaligned accesses are avoided by compilers, but are not uncommon. We therefore would like to see them going through the fast path, if they don't cross pages. For that we can use the fact that two adjacent TLB entries can't contain the same page. Therefore accessing the TLB entry corresponding to the first byte, but comparing its content to page address of the last byte ensures that we don't cross pages. We can do this check without adding more instructions in the TLB code (but increasing its length by one byte) by using the LEA instruction to combine the existing move with the size addition. On an x86-64 host, this gives a 3% boot time improvement for a powerpc guest and 4% for an x86-64 guest. [rth: Tidied calculation of the offset mask] Signed-off-by: Aurelien Jarno Message-Id: <1436467197-2183-1-git-send-email-aurelien@aurel32.net> Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 7648f7efd4..ff55499bb3 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -1172,7 +1172,7 @@ static void * const qemu_st_helpers[16] = { First argument register is clobbered. */ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - int mem_index, TCGMemOp s_bits, + int mem_index, TCGMemOp opc, tcg_insn_unit **label_ptr, int which) { const TCGReg r0 = TCG_REG_L0; @@ -1180,6 +1180,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, TCGType ttype = TCG_TYPE_I32; TCGType htype = TCG_TYPE_I32; int trexw = 0, hrexw = 0; + int s_mask = (1 << (opc & MO_SIZE)) - 1; + bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; if (TCG_TARGET_REG_BITS == 64) { if (TARGET_LONG_BITS == 64) { @@ -1193,13 +1195,19 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, } tcg_out_mov(s, htype, r0, addrlo); - tcg_out_mov(s, ttype, r1, addrlo); + if (aligned) { + tcg_out_mov(s, ttype, r1, addrlo); + } else { + /* For unaligned access check that we don't cross pages using + the page address of the last byte. */ + tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); + } tcg_out_shifti(s, SHIFT_SHR + hrexw, r0, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); tgen_arithi(s, ARITH_AND + trexw, r1, - TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0); + TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); tgen_arithi(s, ARITH_AND + hrexw, r0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); @@ -1545,7 +1553,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) TCGMemOp opc; #if defined(CONFIG_SOFTMMU) int mem_index; - TCGMemOp s_bits; tcg_insn_unit *label_ptr[2]; #endif @@ -1558,9 +1565,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) #if defined(CONFIG_SOFTMMU) mem_index = get_mmuidx(oi); - s_bits = opc & MO_SIZE; - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, label_ptr, offsetof(CPUTLBEntry, addr_read)); /* TLB Hit. */ @@ -1687,7 +1693,6 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) TCGMemOp opc; #if defined(CONFIG_SOFTMMU) int mem_index; - TCGMemOp s_bits; tcg_insn_unit *label_ptr[2]; #endif @@ -1700,9 +1705,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #if defined(CONFIG_SOFTMMU) mem_index = get_mmuidx(oi); - s_bits = opc & MO_SIZE; - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc, label_ptr, offsetof(CPUTLBEntry, addr_write)); /* TLB Hit. */ From 68d45bb61c5bbfb3999486f78cf026c1e79eb301 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 21 Jul 2015 15:19:38 +1000 Subject: [PATCH 14/18] tcg/ppc: Improve unaligned load/store handling on 64-bit backend Currently, we get to the slow path for any unaligned access in the backend, because we effectively preserve the bottom address bits below the alignment requirement when comparing with the TLB entry, so any non-0 bit there will cause the compare to fail. For the same number of instructions, we can instead add the access size - 1 to the address and stick to clearing all the bottom bits. That means that normal unaligned accesses will not fallback (the HW will handle them fine). Only when crossing a page boundary well we end up having a mismatch because we'll end up pointing to the next page which cannot possibly be in that same TLB entry. Reviewed-by: Aurelien Jarno Signed-off-by: Benjamin Herrenschmidt Message-Id: <1437455978.5809.2.camel@kernel.crashing.org> Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c | 41 +++++++++++++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 31fa25c421..1672220739 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -1361,7 +1361,7 @@ static void * const qemu_st_helpers[16] = { in CR7, loads the addend of the TLB into R3, and returns the register containing the guest address (zero-extended into R4). Clobbers R0 and R2. */ -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, +static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc, TCGReg addrlo, TCGReg addrhi, int mem_index, bool is_read) { @@ -1371,6 +1371,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend); TCGReg base = TCG_AREG0; + TCGMemOp s_bits = opc & MO_SIZE; /* Extract the page index, shifted into place for tlb index. */ if (TCG_TARGET_REG_BITS == 64) { @@ -1422,17 +1423,37 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, to minimize any load use delay. */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off); - /* Clear the non-page, non-alignment bits from the address. */ + /* Clear the non-page, non-alignment bits from the address */ if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) { + /* We don't support unaligned accesses on 32-bits, preserve + * the bottom bits and thus trigger a comparison failure on + * unaligned accesses + */ tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0, (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS); - } else if (!s_bits) { - tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, - 0, 63 - TARGET_PAGE_BITS); + } else if (s_bits) { + /* > byte access, we need to handle alignment */ + if ((opc & MO_AMASK) == MO_ALIGN) { + /* Alignment required by the front-end, same as 32-bits */ + tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo, + 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); + tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); + } else { + /* We support unaligned accesses, we need to make sure we fail + * if we cross a page boundary. The trick is to add the + * access_size-1 to the address before masking the low bits. + * That will make the address overflow to the next page if we + * cross a page boundary which will then force a mismatch of + * the TLB compare since the next page cannot possibly be in + * the same TLB index. + */ + tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << s_bits) - 1)); + tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0, + 0, 63 - TARGET_PAGE_BITS); + } } else { - tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo, - 64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - s_bits); - tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0); + /* Byte access, just chop off the bits below the page index */ + tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - TARGET_PAGE_BITS); } if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { @@ -1592,7 +1613,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) #ifdef CONFIG_SOFTMMU mem_index = get_mmuidx(oi); - addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, true); + addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true); /* Load a pointer into the current opcode w/conditional branch-link. */ label_ptr = s->code_ptr; @@ -1667,7 +1688,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #ifdef CONFIG_SOFTMMU mem_index = get_mmuidx(oi); - addrlo = tcg_out_tlb_read(s, s_bits, addrlo, addrhi, mem_index, false); + addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, false); /* Load a pointer into the current opcode w/conditional branch-link. */ label_ptr = s->code_ptr; From a5e39810b9088b5d20fac8e0293f281e1c8b608f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 23 Jul 2015 13:32:35 -0700 Subject: [PATCH 15/18] tcg/s390: Use softmmu fast path for unaligned accesses Signed-off-by: Richard Henderson --- tcg/s390/tcg-target.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index 96c3d65d76..be51c8b668 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -1504,20 +1504,36 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc, int mem_index, bool is_ld) { - TCGMemOp s_bits = opc & MO_SIZE; - uint64_t tlb_mask = TARGET_PAGE_MASK | ((1 << s_bits) - 1); - int ofs; + int s_mask = (1 << (opc & MO_SIZE)) - 1; + int ofs, a_off; + uint64_t tlb_mask; + + /* For aligned accesses, we check the first byte and include the alignment + bits within the address. For unaligned access, we check that we don't + cross pages using the address of the last byte of the access. */ + if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { + a_off = 0; + tlb_mask = TARGET_PAGE_MASK | s_mask; + } else { + a_off = s_mask; + tlb_mask = TARGET_PAGE_MASK; + } if (facilities & FACILITY_GEN_INST_EXT) { tcg_out_risbg(s, TCG_REG_R2, addr_reg, 64 - CPU_TLB_BITS - CPU_TLB_ENTRY_BITS, 63 - CPU_TLB_ENTRY_BITS, 64 + CPU_TLB_ENTRY_BITS - TARGET_PAGE_BITS, 1); - tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); + if (a_off) { + tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); + tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); + } else { + tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask); + } } else { tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_R3, addr_reg); + tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off); tgen_andi(s, TCG_TYPE_I64, TCG_REG_R2, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS); tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask); From 9ee14902bf107e37fb2c8119fa7bca424396237c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 17 Aug 2015 12:18:05 -0700 Subject: [PATCH 16/18] tcg/aarch64: Use softmmu fast path for unaligned accesses Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index 7f7ab7e9aa..bc3a539836 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -1051,14 +1051,29 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, slow path for the failure case, which will be patched later when finalizing the slow path. Generated code returns the host addend in X1, clobbers X0,X2,X3,TMP. */ -static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits, +static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc, tcg_insn_unit **label_ptr, int mem_index, bool is_read) { - TCGReg base = TCG_AREG0; int tlb_offset = is_read ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read) : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write); + int s_mask = (1 << (opc & MO_SIZE)) - 1; + TCGReg base = TCG_AREG0, x3; + uint64_t tlb_mask; + + /* For aligned accesses, we check the first byte and include the alignment + bits within the address. For unaligned access, we check that we don't + cross pages using the address of the last byte of the access. */ + if ((opc & MO_AMASK) == MO_ALIGN || s_mask == 0) { + tlb_mask = TARGET_PAGE_MASK | s_mask; + x3 = addr_reg; + } else { + tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, addr_reg, s_mask); + tlb_mask = TARGET_PAGE_MASK; + x3 = TCG_REG_X3; + } /* Extract the TLB index from the address into X0. X0 = @@ -1066,11 +1081,9 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits, tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg, TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS); - /* Store the page mask part of the address and the low s_bits into X3. - Later this allows checking for equality and alignment at the same time. - X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */ - tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3, - addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1)); + /* Store the page mask part of the address into X3. */ + tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, + TCG_REG_X3, x3, tlb_mask); /* Add any "high bits" from the tlb offset to the env address into X2, to take advantage of the LSL12 form of the ADDI instruction. @@ -1207,10 +1220,9 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; #ifdef CONFIG_SOFTMMU unsigned mem_index = get_mmuidx(oi); - TCGMemOp s_bits = memop & MO_SIZE; tcg_insn_unit *label_ptr; - tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1); + tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1); tcg_out_qemu_ld_direct(s, memop, ext, data_reg, TCG_REG_X1, otype, addr_reg); add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg, @@ -1229,14 +1241,13 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32; #ifdef CONFIG_SOFTMMU unsigned mem_index = get_mmuidx(oi); - TCGMemOp s_bits = memop & MO_SIZE; tcg_insn_unit *label_ptr; - tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0); + tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0); tcg_out_qemu_st_direct(s, memop, data_reg, TCG_REG_X1, otype, addr_reg); - add_qemu_ldst_label(s, false, oi, s_bits == MO_64, data_reg, addr_reg, - s->code_ptr, label_ptr); + add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64, + data_reg, addr_reg, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ tcg_out_qemu_st_direct(s, memop, data_reg, GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR, From 4cbea5986981998cda07b13794c7e3ff7bc42e80 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Mon, 24 Aug 2015 01:42:07 +0200 Subject: [PATCH 17/18] linux-user: remove --enable-guest-base/--disable-guest-base All tcg host architectures now support the guest base and as there is no real performance lost, it can be always enabled. Anyway, guest base use can be disabled lively by setting guest base to 0. CONFIG_USE_GUEST_BASE is defined as (USE_GUEST_BASE && USER_ONLY), it should have to be replaced by CONFIG_USER_ONLY in non CONFIG_USER_ONLY parts, but as some other parts are using !CONFIG_SOFTMMU I have chosen to use !CONFIG_SOFTMMU instead. Reviewed-by: Alexander Graf Signed-off-by: Laurent Vivier Message-Id: <1440373328-9788-2-git-send-email-laurent@vivier.eu> Signed-off-by: Richard Henderson --- bsd-user/elfload.c | 2 -- bsd-user/main.c | 12 ------------ bsd-user/qemu.h | 2 -- configure | 10 ---------- include/exec/cpu-all.h | 5 ----- linux-user/elfload.c | 2 -- linux-user/main.c | 12 ------------ linux-user/mmap.c | 4 ---- tcg/aarch64/tcg-target.c | 8 ++------ tcg/ia64/tcg-target.c | 4 +--- tcg/ppc/tcg-target.c | 6 ++---- tcg/s390/tcg-target.c | 2 +- tcg/sparc/tcg-target.c | 6 ++---- translate-all.c | 2 +- 14 files changed, 9 insertions(+), 68 deletions(-) diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c index 2bf57eb1fc..d067779273 100644 --- a/bsd-user/elfload.c +++ b/bsd-user/elfload.c @@ -1371,7 +1371,6 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs, info->mmap = 0; elf_entry = (abi_ulong) elf_ex.e_entry; -#if defined(CONFIG_USE_GUEST_BASE) /* * In case where user has not explicitly set the guest_base, we * probe here that should we set it automatically. @@ -1392,7 +1391,6 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs, } } } -#endif /* CONFIG_USE_GUEST_BASE */ /* Do this so that we can load the interpreter, if need be. We will change some of these later */ diff --git a/bsd-user/main.c b/bsd-user/main.c index ee68daa395..f0a1268dda 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -35,12 +35,10 @@ #include "qemu/envlist.h" int singlestep; -#if defined(CONFIG_USE_GUEST_BASE) unsigned long mmap_min_addr; unsigned long guest_base; int have_guest_base; unsigned long reserved_va; -#endif static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX; const char *qemu_uname_release; @@ -682,9 +680,7 @@ static void usage(void) "-drop-ld-preload drop LD_PRELOAD for target process\n" "-E var=value sets/modifies targets environment variable(s)\n" "-U var unsets targets environment variable(s)\n" -#if defined(CONFIG_USE_GUEST_BASE) "-B address set guest_base address to address\n" -#endif "-bsd type select emulated BSD type FreeBSD/NetBSD/OpenBSD (default)\n" "\n" "Debug options:\n" @@ -830,11 +826,9 @@ int main(int argc, char **argv) #endif exit(1); } -#if defined(CONFIG_USE_GUEST_BASE) } else if (!strcmp(r, "B")) { guest_base = strtol(argv[optind++], NULL, 0); have_guest_base = 1; -#endif } else if (!strcmp(r, "drop-ld-preload")) { (void) envlist_unsetenv(envlist, "LD_PRELOAD"); } else if (!strcmp(r, "bsd")) { @@ -923,7 +917,6 @@ int main(int argc, char **argv) target_environ = envlist_to_environ(envlist, NULL); envlist_free(envlist); -#if defined(CONFIG_USE_GUEST_BASE) /* * Now that page sizes are configured in cpu_init() we can do * proper page alignment for guest_base. @@ -950,7 +943,6 @@ int main(int argc, char **argv) fclose(fp); } } -#endif /* CONFIG_USE_GUEST_BASE */ if (loader_exec(filename, argv+optind, target_environ, regs, info) != 0) { printf("Error loading %s\n", filename); @@ -964,9 +956,7 @@ int main(int argc, char **argv) free(target_environ); if (qemu_log_enabled()) { -#if defined(CONFIG_USE_GUEST_BASE) qemu_log("guest_base 0x%lx\n", guest_base); -#endif log_page_dump(); qemu_log("start_brk 0x" TARGET_ABI_FMT_lx "\n", info->start_brk); @@ -986,12 +976,10 @@ int main(int argc, char **argv) syscall_init(); signal_init(); -#if defined(CONFIG_USE_GUEST_BASE) /* Now that we've loaded the binary, GUEST_BASE is fixed. Delay generating the prologue until now so that the prologue can take the real value of GUEST_BASE into account. */ tcg_prologue_init(&tcg_ctx); -#endif /* build Task State */ memset(ts, 0, sizeof(TaskState)); diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h index 5362297fe1..21cc6023ee 100644 --- a/bsd-user/qemu.h +++ b/bsd-user/qemu.h @@ -101,9 +101,7 @@ typedef struct TaskState { void init_task_state(TaskState *ts); extern const char *qemu_uname_release; -#if defined(CONFIG_USE_GUEST_BASE) extern unsigned long mmap_min_addr; -#endif /* ??? See if we can avoid exposing so much of the loader internals. */ /* diff --git a/configure b/configure index cc6ced9c09..9d24d59b19 100755 --- a/configure +++ b/configure @@ -293,7 +293,6 @@ cocoa="no" softmmu="yes" linux_user="no" bsd_user="no" -guest_base="yes" aix="no" blobs="yes" pkgversion="" @@ -975,10 +974,6 @@ for opt do ;; --enable-bsd-user) bsd_user="yes" ;; - --enable-guest-base) guest_base="yes" - ;; - --disable-guest-base) guest_base="no" - ;; --enable-pie) pie="yes" ;; --disable-pie) pie="no" @@ -1314,7 +1309,6 @@ disabled with --disable-FEATURE, default is enabled if available: user supported user emulation targets linux-user all linux usermode emulation targets bsd-user all BSD usermode emulation targets - guest-base GUEST_BASE support for usermode emulation targets docs build documentation guest-agent build the QEMU Guest Agent guest-agent-msi build guest agent Windows MSI installation package @@ -4544,7 +4538,6 @@ fi echo "brlapi support $brlapi" echo "bluez support $bluez" echo "Documentation $docs" -echo "GUEST_BASE $guest_base" echo "PIE $pie" echo "vde support $vde" echo "netmap support $netmap" @@ -5481,9 +5474,6 @@ fi if test "$target_user_only" = "yes" -a "$bflt" = "yes"; then echo "TARGET_HAS_BFLT=y" >> $config_target_mak fi -if test "$target_user_only" = "yes" -a "$guest_base" = "yes"; then - echo "CONFIG_USE_GUEST_BASE=y" >> $config_target_mak -fi if test "$target_bsd_user" = "yes" ; then echo "CONFIG_BSD_USER=y" >> $config_target_mak fi diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index ea6a9a667c..5713929ed3 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -160,16 +160,11 @@ static inline void tswap64s(uint64_t *s) /* On some host systems the guest address space is reserved on the host. * This allows the guest address space to be offset to a convenient location. */ -#if defined(CONFIG_USE_GUEST_BASE) extern unsigned long guest_base; extern int have_guest_base; extern unsigned long reserved_va; #define GUEST_BASE guest_base #define RESERVED_VA reserved_va -#else -#define GUEST_BASE 0ul -#define RESERVED_VA 0ul -#endif #define GUEST_ADDR_MAX (RESERVED_VA ? RESERVED_VA : \ (1ul << TARGET_VIRT_ADDR_SPACE_BITS) - 1) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 17883686f0..9c999ac139 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1756,7 +1756,6 @@ static void probe_guest_base(const char *image_name, * it explicitly, and set guest_base appropriately. * In case of error we will print a suitable message and exit. */ -#if defined(CONFIG_USE_GUEST_BASE) const char *errmsg; if (!have_guest_base && !reserved_va) { unsigned long host_start, real_start, host_size; @@ -1795,7 +1794,6 @@ static void probe_guest_base(const char *image_name, exit_errmsg: fprintf(stderr, "%s: %s\n", image_name, errmsg); exit(-1); -#endif } diff --git a/linux-user/main.c b/linux-user/main.c index fdee981351..2c9658e90d 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -43,7 +43,6 @@ int gdbstub_port; envlist_t *envlist; static const char *cpu_model; unsigned long mmap_min_addr; -#if defined(CONFIG_USE_GUEST_BASE) unsigned long guest_base; int have_guest_base; #if (TARGET_LONG_BITS == 32) && (HOST_LONG_BITS == 64) @@ -63,7 +62,6 @@ unsigned long reserved_va = 0xf7000000; #else unsigned long reserved_va; #endif -#endif static void usage(void); @@ -3584,7 +3582,6 @@ static void handle_arg_cpu(const char *arg) } } -#if defined(CONFIG_USE_GUEST_BASE) static void handle_arg_guest_base(const char *arg) { guest_base = strtol(arg, NULL, 0); @@ -3626,7 +3623,6 @@ static void handle_arg_reserved_va(const char *arg) exit(1); } } -#endif static void handle_arg_singlestep(const char *arg) { @@ -3673,12 +3669,10 @@ static const struct qemu_argument arg_table[] = { "argv0", "forces target process argv[0] to be 'argv0'"}, {"r", "QEMU_UNAME", true, handle_arg_uname, "uname", "set qemu uname release string to 'uname'"}, -#if defined(CONFIG_USE_GUEST_BASE) {"B", "QEMU_GUEST_BASE", true, handle_arg_guest_base, "address", "set guest_base address to 'address'"}, {"R", "QEMU_RESERVED_VA", true, handle_arg_reserved_va, "size", "reserve 'size' bytes for guest virtual address space"}, -#endif {"d", "QEMU_LOG", true, handle_arg_log, "item[,...]", "enable logging of specified items " "(use '-d help' for a list of items)"}, @@ -3954,7 +3948,6 @@ int main(int argc, char **argv, char **envp) target_environ = envlist_to_environ(envlist, NULL); envlist_free(envlist); -#if defined(CONFIG_USE_GUEST_BASE) /* * Now that page sizes are configured in cpu_init() we can do * proper page alignment for guest_base. @@ -3976,7 +3969,6 @@ int main(int argc, char **argv, char **envp) mmap_next_start = reserved_va; } } -#endif /* CONFIG_USE_GUEST_BASE */ /* * Read in mmap_min_addr kernel parameter. This value is used @@ -4050,9 +4042,7 @@ int main(int argc, char **argv, char **envp) free(target_environ); if (qemu_log_enabled()) { -#if defined(CONFIG_USE_GUEST_BASE) qemu_log("guest_base 0x%lx\n", guest_base); -#endif log_page_dump(); qemu_log("start_brk 0x" TARGET_ABI_FMT_lx "\n", info->start_brk); @@ -4072,12 +4062,10 @@ int main(int argc, char **argv, char **envp) syscall_init(); signal_init(); -#if defined(CONFIG_USE_GUEST_BASE) /* Now that we've loaded the binary, GUEST_BASE is fixed. Delay generating the prologue until now so that the prologue can take the real value of GUEST_BASE into account. */ tcg_prologue_init(&tcg_ctx); -#endif #if defined(TARGET_I386) env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 78e1b2df43..88276e77f8 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -206,7 +206,6 @@ abi_ulong mmap_next_start = TASK_UNMAPPED_BASE; unsigned long last_brk; -#ifdef CONFIG_USE_GUEST_BASE /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk of guest address space. */ static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size) @@ -253,7 +252,6 @@ static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size) return addr; } -#endif /* * Find and reserve a free memory area of size 'size'. The search @@ -276,11 +274,9 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size) size = HOST_PAGE_ALIGN(size); -#ifdef CONFIG_USE_GUEST_BASE if (RESERVED_VA) { return mmap_find_vma_reserved(start, size); } -#endif addr = start; wrapped = repeat = 0; diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index bc3a539836..b0ddcf2f08 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -56,11 +56,7 @@ static const int tcg_target_call_oarg_regs[1] = { #define TCG_REG_TMP TCG_REG_X30 #ifndef CONFIG_SOFTMMU -# ifdef CONFIG_USE_GUEST_BASE -# define TCG_REG_GUEST_BASE TCG_REG_X28 -# else -# define TCG_REG_GUEST_BASE TCG_REG_XZR -# endif +#define TCG_REG_GUEST_BASE TCG_REG_X28 #endif static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target) @@ -1809,7 +1805,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, CPU_TEMP_BUF_NLONGS * sizeof(long)); -#if defined(CONFIG_USE_GUEST_BASE) +#if !defined(CONFIG_SOFTMMU) if (GUEST_BASE) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE); tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 71e79cfbbc..64b5cb6d97 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -40,10 +40,8 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { }; #endif -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU #define TCG_GUEST_BASE_REG TCG_REG_R55 -#else -#define TCG_GUEST_BASE_REG TCG_REG_R0 #endif #ifndef GUEST_BASE #define GUEST_BASE 0 diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 1672220739..36a97a711c 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -89,10 +89,8 @@ static bool have_isa_2_06; #define HAVE_ISA_2_06 have_isa_2_06 #define HAVE_ISEL have_isa_2_06 -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU #define TCG_GUEST_BASE_REG 30 -#else -#define TCG_GUEST_BASE_REG 0 #endif #ifndef NDEBUG @@ -1800,7 +1798,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) } tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU if (GUEST_BASE) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index be51c8b668..2091bebb44 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -51,7 +51,7 @@ /* A scratch register that may be be used throughout the backend. */ #define TCG_TMP0 TCG_REG_R14 -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU #define TCG_GUEST_BASE_REG TCG_REG_R13 #else #define TCG_GUEST_BASE_REG TCG_REG_R0 diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index 87f9bcca4b..b573e0da19 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -83,10 +83,8 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #define TCG_REG_T1 TCG_REG_G1 #define TCG_REG_T2 TCG_REG_O7 -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU # define TCG_GUEST_BASE_REG TCG_REG_I5 -#else -# define TCG_GUEST_BASE_REG TCG_REG_G0 #endif static const int tcg_target_reg_alloc_order[] = { @@ -955,7 +953,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) | INSN_IMM13(-frame_size)); -#ifdef CONFIG_USE_GUEST_BASE +#ifndef CONFIG_SOFTMMU if (GUEST_BASE != 0) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); diff --git a/translate-all.c b/translate-all.c index 9c46ffa0e3..2a40530bba 100644 --- a/translate-all.c +++ b/translate-all.c @@ -688,7 +688,7 @@ void tcg_exec_init(unsigned long tb_size) tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer; tcg_register_jit(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size); page_init(); -#if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE) +#if defined(CONFIG_SOFTMMU) /* There's no guest base to take into account, so go ahead and initialize the prologue now. */ tcg_prologue_init(&tcg_ctx); From b76f21a70748b735d6ac84fec4bb9bdaafa339b1 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Mon, 24 Aug 2015 14:53:54 +0200 Subject: [PATCH 18/18] linux-user: remove useless macros GUEST_BASE and RESERVED_VA As we have removed CONFIG_USE_GUEST_BASE, we always use a guest base and the macros GUEST_BASE and RESERVED_VA become useless: replace them by their values. Reviewed-by: Alexander Graf Signed-off-by: Laurent Vivier Message-Id: <1440420834-8388-1-git-send-email-laurent@vivier.eu> Signed-off-by: Richard Henderson --- include/exec/cpu-all.h | 4 +--- include/exec/cpu_ldst.h | 8 ++++---- linux-user/mmap.c | 20 ++++++++++---------- tcg/aarch64/tcg-target.c | 10 +++++----- tcg/arm/tcg-target.c | 8 ++++---- tcg/i386/tcg-target.c | 22 +++++++++++----------- tcg/ia64/tcg-target.c | 21 +++++++++------------ tcg/mips/tcg-target.c | 16 ++++++++-------- tcg/ppc/tcg-target.c | 12 ++++-------- tcg/s390/tcg-target.c | 13 ++++--------- tcg/sparc/tcg-target.c | 8 ++++---- 11 files changed, 64 insertions(+), 78 deletions(-) diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 5713929ed3..89db792767 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -163,10 +163,8 @@ static inline void tswap64s(uint64_t *s) extern unsigned long guest_base; extern int have_guest_base; extern unsigned long reserved_va; -#define GUEST_BASE guest_base -#define RESERVED_VA reserved_va -#define GUEST_ADDR_MAX (RESERVED_VA ? RESERVED_VA : \ +#define GUEST_ADDR_MAX (reserved_va ? reserved_va : \ (1ul << TARGET_VIRT_ADDR_SPACE_BITS) - 1) #endif diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index 1239c60f23..26f479416a 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -49,20 +49,20 @@ #if defined(CONFIG_USER_ONLY) /* All direct uses of g2h and h2g need to go away for usermode softmmu. */ -#define g2h(x) ((void *)((unsigned long)(target_ulong)(x) + GUEST_BASE)) +#define g2h(x) ((void *)((unsigned long)(target_ulong)(x) + guest_base)) #if HOST_LONG_BITS <= TARGET_VIRT_ADDR_SPACE_BITS #define h2g_valid(x) 1 #else #define h2g_valid(x) ({ \ - unsigned long __guest = (unsigned long)(x) - GUEST_BASE; \ + unsigned long __guest = (unsigned long)(x) - guest_base; \ (__guest < (1ul << TARGET_VIRT_ADDR_SPACE_BITS)) && \ - (!RESERVED_VA || (__guest < RESERVED_VA)); \ + (!reserved_va || (__guest < reserved_va)); \ }) #endif #define h2g_nocheck(x) ({ \ - unsigned long __ret = (unsigned long)(x) - GUEST_BASE; \ + unsigned long __ret = (unsigned long)(x) - guest_base; \ (abi_ulong)__ret; \ }) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 88276e77f8..b2126c76fa 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -215,14 +215,14 @@ static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size) int prot; int looped = 0; - if (size > RESERVED_VA) { + if (size > reserved_va) { return (abi_ulong)-1; } size = HOST_PAGE_ALIGN(size); end_addr = start + size; - if (end_addr > RESERVED_VA) { - end_addr = RESERVED_VA; + if (end_addr > reserved_va) { + end_addr = reserved_va; } addr = end_addr - qemu_host_page_size; @@ -231,7 +231,7 @@ static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size) if (looped) { return (abi_ulong)-1; } - end_addr = RESERVED_VA; + end_addr = reserved_va; addr = end_addr - qemu_host_page_size; looped = 1; continue; @@ -274,7 +274,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size) size = HOST_PAGE_ALIGN(size); - if (RESERVED_VA) { + if (reserved_va) { return mmap_find_vma_reserved(start, size); } @@ -667,7 +667,7 @@ int target_munmap(abi_ulong start, abi_ulong len) ret = 0; /* unmap what we can */ if (real_start < real_end) { - if (RESERVED_VA) { + if (reserved_va) { mmap_reserve(real_start, real_end - real_start); } else { ret = munmap(g2h(real_start), real_end - real_start); @@ -697,7 +697,7 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, flags, g2h(new_addr)); - if (RESERVED_VA && host_addr != MAP_FAILED) { + if (reserved_va && host_addr != MAP_FAILED) { /* If new and old addresses overlap then the above mremap will already have failed with EINVAL. */ mmap_reserve(old_addr, old_size); @@ -715,13 +715,13 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, old_size, new_size, flags | MREMAP_FIXED, g2h(mmap_start)); - if ( RESERVED_VA ) { + if (reserved_va) { mmap_reserve(old_addr, old_size); } } } else { int prot = 0; - if (RESERVED_VA && old_size < new_size) { + if (reserved_va && old_size < new_size) { abi_ulong addr; for (addr = old_addr + old_size; addr < old_addr + new_size; @@ -731,7 +731,7 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, } if (prot == 0) { host_addr = mremap(g2h(old_addr), old_size, new_size, flags); - if (host_addr != MAP_FAILED && RESERVED_VA && old_size > new_size) { + if (host_addr != MAP_FAILED && reserved_va && old_size > new_size) { mmap_reserve(old_addr + old_size, new_size - old_size); } } else { diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index b0ddcf2f08..01ae610cd7 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -30,7 +30,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { static const int tcg_target_reg_alloc_order[] = { TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23, TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27, - TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */ + TCG_REG_X28, /* we will reserve this for guest_base if configured */ TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15, @@ -1225,7 +1225,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ tcg_out_qemu_ld_direct(s, memop, ext, data_reg, - GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR, + guest_base ? TCG_REG_GUEST_BASE : TCG_REG_XZR, otype, addr_reg); #endif /* CONFIG_SOFTMMU */ } @@ -1246,7 +1246,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, data_reg, addr_reg, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ tcg_out_qemu_st_direct(s, memop, data_reg, - GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR, + guest_base ? TCG_REG_GUEST_BASE : TCG_REG_XZR, otype, addr_reg); #endif /* CONFIG_SOFTMMU */ } @@ -1806,8 +1806,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) CPU_TEMP_BUF_NLONGS * sizeof(long)); #if !defined(CONFIG_SOFTMMU) - if (GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE); + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE); } #endif diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index ae2ec7a922..3edf6a6f97 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -1493,8 +1493,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ - if (GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP); } else { tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo); @@ -1623,8 +1623,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ - if (GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, GUEST_BASE); + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base); tcg_out_qemu_st_index(s, COND_AL, opc, datalo, datahi, addrlo, TCG_REG_TMP); } else { diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index ff55499bb3..d2adbc4d17 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -1432,7 +1432,7 @@ int arch_prctl(int code, unsigned long addr); static int guest_base_flags; static inline void setup_guest_base_seg(void) { - if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) { + if (arch_prctl(ARCH_SET_GS, guest_base) == 0) { guest_base_flags = P_GS; } } @@ -1577,7 +1577,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) s->code_ptr, label_ptr); #else { - int32_t offset = GUEST_BASE; + int32_t offset = guest_base; TCGReg base = addrlo; int index = -1; int seg = 0; @@ -1586,7 +1586,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) We can do this with the ADDR32 prefix if we're not using a guest base, or when using segmentation. Otherwise we need to zero-extend manually. */ - if (GUEST_BASE == 0 || guest_base_flags) { + if (guest_base == 0 || guest_base_flags) { seg = guest_base_flags; offset = 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { @@ -1597,8 +1597,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) tcg_out_ext32u(s, TCG_REG_L0, base); base = TCG_REG_L0; } - if (offset != GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); + if (offset != guest_base) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); index = TCG_REG_L1; offset = 0; } @@ -1717,12 +1717,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) s->code_ptr, label_ptr); #else { - int32_t offset = GUEST_BASE; + int32_t offset = guest_base; TCGReg base = addrlo; int seg = 0; /* See comment in tcg_out_qemu_ld re zero-extension of addrlo. */ - if (GUEST_BASE == 0 || guest_base_flags) { + if (guest_base == 0 || guest_base_flags) { seg = guest_base_flags; offset = 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { @@ -1731,12 +1731,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) } else if (TCG_TARGET_REG_BITS == 64) { /* ??? Note that we can't use the same SIB addressing scheme as for loads, since we require L0 free for bswap. */ - if (offset != GUEST_BASE) { + if (offset != guest_base) { if (TARGET_LONG_BITS == 32) { tcg_out_ext32u(s, TCG_REG_L0, base); base = TCG_REG_L0; } - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, guest_base); tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); base = TCG_REG_L1; offset = 0; @@ -2315,8 +2315,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_opc(s, OPC_RET, 0, 0, 0); #if !defined(CONFIG_SOFTMMU) - /* Try to set up a segment register to point to GUEST_BASE. */ - if (GUEST_BASE) { + /* Try to set up a segment register to point to guest_base. */ + if (guest_base) { setup_guest_base_seg(); } #endif diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c index 64b5cb6d97..3c07017868 100644 --- a/tcg/ia64/tcg-target.c +++ b/tcg/ia64/tcg-target.c @@ -43,9 +43,6 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #ifndef CONFIG_SOFTMMU #define TCG_GUEST_BASE_REG TCG_REG_R55 #endif -#ifndef GUEST_BASE -#define GUEST_BASE 0 -#endif /* Branch registers */ enum { @@ -1763,7 +1760,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) bswap = opc & MO_BSWAP; #if TARGET_LONG_BITS == 32 - if (GUEST_BASE != 0) { + if (guest_base != 0) { tcg_out_bundle(s, mII, INSN_NOP_M, tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, @@ -1827,7 +1824,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args) } } #else - if (GUEST_BASE != 0) { + if (guest_base != 0) { tcg_out_bundle(s, MmI, tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, TCG_GUEST_BASE_REG, addr_reg), @@ -1887,7 +1884,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) bswap = opc & MO_BSWAP; #if TARGET_LONG_BITS == 32 - if (GUEST_BASE != 0) { + if (guest_base != 0) { tcg_out_bundle(s, mII, INSN_NOP_M, tcg_opc_i29(TCG_REG_P0, OPC_ZXT4_I29, @@ -1933,7 +1930,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) INSN_NOP_M, INSN_NOP_I); #else - if (GUEST_BASE != 0) { + if (guest_base != 0) { add_guest_base = tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1, TCG_REG_R2, TCG_GUEST_BASE_REG, addr_reg); addr_reg = TCG_REG_R2; @@ -1942,7 +1939,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args) } if (!bswap) { - tcg_out_bundle(s, (GUEST_BASE ? MmI : mmI), + tcg_out_bundle(s, (guest_base ? MmI : mmI), add_guest_base, tcg_opc_m4 (TCG_REG_P0, opc_st_m4[s_bits], data_reg, addr_reg), @@ -2351,14 +2348,14 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_opc_i21(TCG_REG_P0, OPC_MOV_I21, TCG_REG_B6, TCG_REG_R33, 0)); - /* ??? If GUEST_BASE < 0x200000, we could load the register via + /* ??? If guest_base < 0x200000, we could load the register via an ADDL in the M slot of the next bundle. */ - if (GUEST_BASE != 0) { + if (guest_base != 0) { tcg_out_bundle(s, mlx, INSN_NOP_M, - tcg_opc_l2 (GUEST_BASE), + tcg_opc_l2(guest_base), tcg_opc_x2 (TCG_REG_P0, OPC_MOVL_X2, - TCG_GUEST_BASE_REG, GUEST_BASE)); + TCG_GUEST_BASE_REG, guest_base)); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index e97980df0b..c0ce520228 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -1180,12 +1180,12 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) add_qemu_ldst_label(s, 1, oi, data_regl, data_regh, addr_regl, addr_regh, s->code_ptr, label_ptr); #else - if (GUEST_BASE == 0 && data_regl != addr_regl) { + if (guest_base == 0 && data_regl != addr_regl) { base = addr_regl; - } else if (GUEST_BASE == (int16_t)GUEST_BASE) { - tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); + } else if (guest_base == (int16_t)guest_base) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); } else { - tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); + tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); } tcg_out_qemu_ld_direct(s, data_regl, data_regh, base, opc); @@ -1314,14 +1314,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) add_qemu_ldst_label(s, 0, oi, data_regl, data_regh, addr_regl, addr_regh, s->code_ptr, label_ptr); #else - if (GUEST_BASE == 0) { + if (guest_base == 0) { base = addr_regl; } else { base = TCG_REG_A0; - if (GUEST_BASE == (int16_t)GUEST_BASE) { - tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, GUEST_BASE); + if (guest_base == (int16_t)guest_base) { + tcg_out_opc_imm(s, OPC_ADDIU, base, addr_regl, guest_base); } else { - tcg_out_movi(s, TCG_TYPE_PTR, base, GUEST_BASE); + tcg_out_movi(s, TCG_TYPE_PTR, base, guest_base); tcg_out_opc_reg(s, OPC_ADDU, base, base, addr_regl); } } diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 36a97a711c..92ef719e40 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -80,10 +80,6 @@ static tcg_insn_unit *tb_ret_addr; -#ifndef GUEST_BASE -#define GUEST_BASE 0 -#endif - #include "elf.h" static bool have_isa_2_06; #define HAVE_ISA_2_06 have_isa_2_06 @@ -1619,7 +1615,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64) rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ - rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; + rbase = guest_base ? TCG_GUEST_BASE_REG : 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); addrlo = TCG_REG_TMP1; @@ -1694,7 +1690,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) rbase = TCG_REG_R3; #else /* !CONFIG_SOFTMMU */ - rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0; + rbase = guest_base ? TCG_GUEST_BASE_REG : 0; if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) { tcg_out_ext32u(s, TCG_REG_TMP1, addrlo); addrlo = TCG_REG_TMP1; @@ -1799,8 +1795,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_R0, TCG_REG_R1, FRAME_SIZE+LR_OFFSET); #ifndef CONFIG_SOFTMMU - if (GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); + if (guest_base) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c index 2091bebb44..ee2e58d4da 100644 --- a/tcg/s390/tcg-target.c +++ b/tcg/s390/tcg-target.c @@ -57,11 +57,6 @@ #define TCG_GUEST_BASE_REG TCG_REG_R0 #endif -#ifndef GUEST_BASE -#define GUEST_BASE 0 -#endif - - /* All of the following instructions are prefixed with their instruction format, and are defined as 8- or 16-bit quantities, even when the two halves of the 16-bit quantity may appear 32 bits apart in the insn. @@ -1638,9 +1633,9 @@ static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg, tgen_ext32u(s, TCG_TMP0, *addr_reg); *addr_reg = TCG_TMP0; } - if (GUEST_BASE < 0x80000) { + if (guest_base < 0x80000) { *index_reg = TCG_REG_NONE; - *disp = GUEST_BASE; + *disp = guest_base; } else { *index_reg = TCG_GUEST_BASE_REG; *disp = 0; @@ -2349,8 +2344,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET, CPU_TEMP_BUF_NLONGS * sizeof(long)); - if (GUEST_BASE >= 0x80000) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); + if (guest_base >= 0x80000) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c index b573e0da19..54df1bc424 100644 --- a/tcg/sparc/tcg-target.c +++ b/tcg/sparc/tcg-target.c @@ -954,8 +954,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) INSN_IMM13(-frame_size)); #ifndef CONFIG_SOFTMMU - if (GUEST_BASE != 0) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE); + if (guest_base != 0) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif @@ -1144,7 +1144,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, addr = TCG_REG_T1; } tcg_out_ldst_rr(s, data, addr, - (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), + (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), qemu_ld_opc[memop & (MO_BSWAP | MO_SSIZE)]); #endif /* CONFIG_SOFTMMU */ } @@ -1199,7 +1199,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, addr = TCG_REG_T1; } tcg_out_ldst_rr(s, data, addr, - (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0), + (guest_base ? TCG_GUEST_BASE_REG : TCG_REG_G0), qemu_st_opc[memop & (MO_BSWAP | MO_SIZE)]); #endif /* CONFIG_SOFTMMU */ }