tcg: Save insn data and use it in cpu_restore_state_from_tb
We can now restore state without retranslation. Reviewed-by: Aurelien Jarno <aurelien@aurel32.net> Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
parent
bad729e272
commit
fca8a500d5
@ -199,6 +199,7 @@ struct TranslationBlock {
|
|||||||
#define CF_USE_ICOUNT 0x20000
|
#define CF_USE_ICOUNT 0x20000
|
||||||
|
|
||||||
void *tc_ptr; /* pointer to the translated code */
|
void *tc_ptr; /* pointer to the translated code */
|
||||||
|
uint8_t *tc_search; /* pointer to search data */
|
||||||
/* next matching tb for physical address. */
|
/* next matching tb for physical address. */
|
||||||
struct TranslationBlock *phys_hash_next;
|
struct TranslationBlock *phys_hash_next;
|
||||||
/* original tb when cflags has CF_NOCACHE */
|
/* original tb when cflags has CF_NOCACHE */
|
||||||
|
40
tcg/tcg.c
40
tcg/tcg.c
@ -2294,7 +2294,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
|
|||||||
tcg_insn_unit *gen_code_buf,
|
tcg_insn_unit *gen_code_buf,
|
||||||
long search_pc)
|
long search_pc)
|
||||||
{
|
{
|
||||||
int i, oi, oi_next;
|
int i, oi, oi_next, num_insns;
|
||||||
|
|
||||||
#ifdef DEBUG_DISAS
|
#ifdef DEBUG_DISAS
|
||||||
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
|
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
|
||||||
@ -2338,6 +2338,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
|
|||||||
|
|
||||||
tcg_out_tb_init(s);
|
tcg_out_tb_init(s);
|
||||||
|
|
||||||
|
num_insns = -1;
|
||||||
for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
|
for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
|
||||||
TCGOp * const op = &s->gen_op_buf[oi];
|
TCGOp * const op = &s->gen_op_buf[oi];
|
||||||
TCGArg * const args = &s->gen_opparam_buf[op->args];
|
TCGArg * const args = &s->gen_opparam_buf[op->args];
|
||||||
@ -2361,6 +2362,10 @@ static inline int tcg_gen_code_common(TCGContext *s,
|
|||||||
tcg_reg_alloc_movi(s, args, dead_args, sync_args);
|
tcg_reg_alloc_movi(s, args, dead_args, sync_args);
|
||||||
break;
|
break;
|
||||||
case INDEX_op_insn_start:
|
case INDEX_op_insn_start:
|
||||||
|
if (num_insns >= 0) {
|
||||||
|
s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
|
||||||
|
}
|
||||||
|
num_insns++;
|
||||||
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
|
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
|
||||||
target_ulong a;
|
target_ulong a;
|
||||||
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
|
#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
|
||||||
@ -2368,7 +2373,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
|
|||||||
#else
|
#else
|
||||||
a = args[i];
|
a = args[i];
|
||||||
#endif
|
#endif
|
||||||
s->gen_opc_data[i] = a;
|
s->gen_insn_data[num_insns][i] = a;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case INDEX_op_discard:
|
case INDEX_op_discard:
|
||||||
@ -2400,6 +2405,8 @@ static inline int tcg_gen_code_common(TCGContext *s,
|
|||||||
check_regs(s);
|
check_regs(s);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
tcg_debug_assert(num_insns >= 0);
|
||||||
|
s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
|
||||||
|
|
||||||
/* Generate TB finalization at the end of block */
|
/* Generate TB finalization at the end of block */
|
||||||
tcg_out_tb_finalize(s);
|
tcg_out_tb_finalize(s);
|
||||||
@ -2448,24 +2455,26 @@ int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
|
|||||||
void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
|
void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
|
||||||
{
|
{
|
||||||
TCGContext *s = &tcg_ctx;
|
TCGContext *s = &tcg_ctx;
|
||||||
int64_t tot;
|
int64_t tb_count = s->tb_count;
|
||||||
|
int64_t tb_div_count = tb_count ? tb_count : 1;
|
||||||
|
int64_t tot = s->interm_time + s->code_time;
|
||||||
|
|
||||||
tot = s->interm_time + s->code_time;
|
|
||||||
cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
|
cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
|
||||||
tot, tot / 2.4e9);
|
tot, tot / 2.4e9);
|
||||||
cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
|
cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
|
||||||
s->tb_count,
|
tb_count, s->tb_count1 - tb_count,
|
||||||
s->tb_count1 - s->tb_count,
|
(double)(s->tb_count1 - s->tb_count)
|
||||||
s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0);
|
/ (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
|
||||||
cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
|
cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
|
||||||
s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max);
|
(double)s->op_count / tb_div_count, s->op_count_max);
|
||||||
cpu_fprintf(f, "deleted ops/TB %0.2f\n",
|
cpu_fprintf(f, "deleted ops/TB %0.2f\n",
|
||||||
s->tb_count ?
|
(double)s->del_op_count / tb_div_count);
|
||||||
(double)s->del_op_count / s->tb_count : 0);
|
|
||||||
cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
|
cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
|
||||||
s->tb_count ?
|
(double)s->temp_count / tb_div_count, s->temp_count_max);
|
||||||
(double)s->temp_count / s->tb_count : 0,
|
cpu_fprintf(f, "avg host code/TB %0.1f\n",
|
||||||
s->temp_count_max);
|
(double)s->code_out_len / tb_div_count);
|
||||||
|
cpu_fprintf(f, "avg search data/TB %0.1f\n",
|
||||||
|
(double)s->search_out_len / tb_div_count);
|
||||||
|
|
||||||
cpu_fprintf(f, "cycles/op %0.1f\n",
|
cpu_fprintf(f, "cycles/op %0.1f\n",
|
||||||
s->op_count ? (double)tot / s->op_count : 0);
|
s->op_count ? (double)tot / s->op_count : 0);
|
||||||
@ -2473,8 +2482,11 @@ void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
|
|||||||
s->code_in_len ? (double)tot / s->code_in_len : 0);
|
s->code_in_len ? (double)tot / s->code_in_len : 0);
|
||||||
cpu_fprintf(f, "cycles/out byte %0.1f\n",
|
cpu_fprintf(f, "cycles/out byte %0.1f\n",
|
||||||
s->code_out_len ? (double)tot / s->code_out_len : 0);
|
s->code_out_len ? (double)tot / s->code_out_len : 0);
|
||||||
if (tot == 0)
|
cpu_fprintf(f, "cycles/search byte %0.1f\n",
|
||||||
|
s->search_out_len ? (double)tot / s->search_out_len : 0);
|
||||||
|
if (tot == 0) {
|
||||||
tot = 1;
|
tot = 1;
|
||||||
|
}
|
||||||
cpu_fprintf(f, " gen_interm time %0.1f%%\n",
|
cpu_fprintf(f, " gen_interm time %0.1f%%\n",
|
||||||
(double)s->interm_time / tot * 100.0);
|
(double)s->interm_time / tot * 100.0);
|
||||||
cpu_fprintf(f, " gen_code time %0.1f%%\n",
|
cpu_fprintf(f, " gen_code time %0.1f%%\n",
|
||||||
|
@ -532,6 +532,7 @@ struct TCGContext {
|
|||||||
int64_t del_op_count;
|
int64_t del_op_count;
|
||||||
int64_t code_in_len;
|
int64_t code_in_len;
|
||||||
int64_t code_out_len;
|
int64_t code_out_len;
|
||||||
|
int64_t search_out_len;
|
||||||
int64_t interm_time;
|
int64_t interm_time;
|
||||||
int64_t code_time;
|
int64_t code_time;
|
||||||
int64_t la_time;
|
int64_t la_time;
|
||||||
@ -581,7 +582,8 @@ struct TCGContext {
|
|||||||
uint16_t gen_opc_icount[OPC_BUF_SIZE];
|
uint16_t gen_opc_icount[OPC_BUF_SIZE];
|
||||||
uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
|
uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
|
||||||
|
|
||||||
target_ulong gen_opc_data[TARGET_INSN_START_WORDS];
|
uint16_t gen_insn_end_off[TCG_MAX_INSNS];
|
||||||
|
target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
|
||||||
};
|
};
|
||||||
|
|
||||||
extern TCGContext tcg_ctx;
|
extern TCGContext tcg_ctx;
|
||||||
|
150
translate-all.c
150
translate-all.c
@ -168,61 +168,128 @@ void cpu_gen_init(void)
|
|||||||
tcg_context_init(&tcg_ctx);
|
tcg_context_init(&tcg_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Encode VAL as a signed leb128 sequence at P.
|
||||||
|
Return P incremented past the encoded value. */
|
||||||
|
static uint8_t *encode_sleb128(uint8_t *p, target_long val)
|
||||||
|
{
|
||||||
|
int more, byte;
|
||||||
|
|
||||||
|
do {
|
||||||
|
byte = val & 0x7f;
|
||||||
|
val >>= 7;
|
||||||
|
more = !((val == 0 && (byte & 0x40) == 0)
|
||||||
|
|| (val == -1 && (byte & 0x40) != 0));
|
||||||
|
if (more) {
|
||||||
|
byte |= 0x80;
|
||||||
|
}
|
||||||
|
*p++ = byte;
|
||||||
|
} while (more);
|
||||||
|
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Decode a signed leb128 sequence at *PP; increment *PP past the
|
||||||
|
decoded value. Return the decoded value. */
|
||||||
|
static target_long decode_sleb128(uint8_t **pp)
|
||||||
|
{
|
||||||
|
uint8_t *p = *pp;
|
||||||
|
target_long val = 0;
|
||||||
|
int byte, shift = 0;
|
||||||
|
|
||||||
|
do {
|
||||||
|
byte = *p++;
|
||||||
|
val |= (target_ulong)(byte & 0x7f) << shift;
|
||||||
|
shift += 7;
|
||||||
|
} while (byte & 0x80);
|
||||||
|
if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
|
||||||
|
val |= -(target_ulong)1 << shift;
|
||||||
|
}
|
||||||
|
|
||||||
|
*pp = p;
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Encode the data collected about the instructions while compiling TB.
|
||||||
|
Place the data at BLOCK, and return the number of bytes consumed.
|
||||||
|
|
||||||
|
The logical table consisits of TARGET_INSN_START_WORDS target_ulong's,
|
||||||
|
which come from the target's insn_start data, followed by a uintptr_t
|
||||||
|
which comes from the host pc of the end of the code implementing the insn.
|
||||||
|
|
||||||
|
Each line of the table is encoded as sleb128 deltas from the previous
|
||||||
|
line. The seed for the first line is { tb->pc, 0..., tb->tc_ptr }.
|
||||||
|
That is, the first column is seeded with the guest pc, the last column
|
||||||
|
with the host pc, and the middle columns with zeros. */
|
||||||
|
|
||||||
|
static int encode_search(TranslationBlock *tb, uint8_t *block)
|
||||||
|
{
|
||||||
|
uint8_t *p = block;
|
||||||
|
int i, j, n;
|
||||||
|
|
||||||
|
tb->tc_search = block;
|
||||||
|
|
||||||
|
for (i = 0, n = tb->icount; i < n; ++i) {
|
||||||
|
target_ulong prev;
|
||||||
|
|
||||||
|
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
|
||||||
|
if (i == 0) {
|
||||||
|
prev = (j == 0 ? tb->pc : 0);
|
||||||
|
} else {
|
||||||
|
prev = tcg_ctx.gen_insn_data[i - 1][j];
|
||||||
|
}
|
||||||
|
p = encode_sleb128(p, tcg_ctx.gen_insn_data[i][j] - prev);
|
||||||
|
}
|
||||||
|
prev = (i == 0 ? 0 : tcg_ctx.gen_insn_end_off[i - 1]);
|
||||||
|
p = encode_sleb128(p, tcg_ctx.gen_insn_end_off[i] - prev);
|
||||||
|
}
|
||||||
|
|
||||||
|
return p - block;
|
||||||
|
}
|
||||||
|
|
||||||
/* The cpu state corresponding to 'searched_pc' is restored. */
|
/* The cpu state corresponding to 'searched_pc' is restored. */
|
||||||
static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
|
static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
|
||||||
uintptr_t searched_pc)
|
uintptr_t searched_pc)
|
||||||
{
|
{
|
||||||
|
target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc };
|
||||||
|
uintptr_t host_pc = (uintptr_t)tb->tc_ptr;
|
||||||
CPUArchState *env = cpu->env_ptr;
|
CPUArchState *env = cpu->env_ptr;
|
||||||
TCGContext *s = &tcg_ctx;
|
uint8_t *p = tb->tc_search;
|
||||||
int j;
|
int i, j, num_insns = tb->icount;
|
||||||
uintptr_t tc_ptr;
|
|
||||||
#ifdef CONFIG_PROFILER
|
#ifdef CONFIG_PROFILER
|
||||||
int64_t ti;
|
int64_t ti = profile_getclock();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_PROFILER
|
if (searched_pc < host_pc) {
|
||||||
ti = profile_getclock();
|
return -1;
|
||||||
#endif
|
}
|
||||||
tcg_func_start(s);
|
|
||||||
|
|
||||||
gen_intermediate_code_pc(env, tb);
|
/* Reconstruct the stored insn data while looking for the point at
|
||||||
|
which the end of the insn exceeds the searched_pc. */
|
||||||
|
for (i = 0; i < num_insns; ++i) {
|
||||||
|
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
|
||||||
|
data[j] += decode_sleb128(&p);
|
||||||
|
}
|
||||||
|
host_pc += decode_sleb128(&p);
|
||||||
|
if (host_pc > searched_pc) {
|
||||||
|
goto found;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
found:
|
||||||
if (tb->cflags & CF_USE_ICOUNT) {
|
if (tb->cflags & CF_USE_ICOUNT) {
|
||||||
assert(use_icount);
|
assert(use_icount);
|
||||||
/* Reset the cycle counter to the start of the block. */
|
/* Reset the cycle counter to the start of the block. */
|
||||||
cpu->icount_decr.u16.low += tb->icount;
|
cpu->icount_decr.u16.low += num_insns;
|
||||||
/* Clear the IO flag. */
|
/* Clear the IO flag. */
|
||||||
cpu->can_do_io = 0;
|
cpu->can_do_io = 0;
|
||||||
}
|
}
|
||||||
|
cpu->icount_decr.u16.low -= i;
|
||||||
/* find opc index corresponding to search_pc */
|
restore_state_to_opc(env, tb, data);
|
||||||
tc_ptr = (uintptr_t)tb->tc_ptr;
|
|
||||||
if (searched_pc < tc_ptr)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
s->tb_next_offset = tb->tb_next_offset;
|
|
||||||
#ifdef USE_DIRECT_JUMP
|
|
||||||
s->tb_jmp_offset = tb->tb_jmp_offset;
|
|
||||||
s->tb_next = NULL;
|
|
||||||
#else
|
|
||||||
s->tb_jmp_offset = NULL;
|
|
||||||
s->tb_next = tb->tb_next;
|
|
||||||
#endif
|
|
||||||
j = tcg_gen_code_search_pc(s, (tcg_insn_unit *)tc_ptr,
|
|
||||||
searched_pc - tc_ptr);
|
|
||||||
if (j < 0)
|
|
||||||
return -1;
|
|
||||||
/* now find start of instruction before */
|
|
||||||
while (s->gen_opc_instr_start[j] == 0) {
|
|
||||||
j--;
|
|
||||||
}
|
|
||||||
cpu->icount_decr.u16.low -= s->gen_opc_icount[j];
|
|
||||||
|
|
||||||
restore_state_to_opc(env, tb, s->gen_opc_data);
|
|
||||||
|
|
||||||
#ifdef CONFIG_PROFILER
|
#ifdef CONFIG_PROFILER
|
||||||
s->restore_time += profile_getclock() - ti;
|
tcg_ctx.restore_time += profile_getclock() - ti;
|
||||||
s->restore_count++;
|
tcg_ctx.restore_count++;
|
||||||
#endif
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -969,7 +1036,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
|
|||||||
tb_page_addr_t phys_pc, phys_page2;
|
tb_page_addr_t phys_pc, phys_page2;
|
||||||
target_ulong virt_page2;
|
target_ulong virt_page2;
|
||||||
tcg_insn_unit *gen_code_buf;
|
tcg_insn_unit *gen_code_buf;
|
||||||
int gen_code_size;
|
int gen_code_size, search_size;
|
||||||
#ifdef CONFIG_PROFILER
|
#ifdef CONFIG_PROFILER
|
||||||
int64_t ti;
|
int64_t ti;
|
||||||
#endif
|
#endif
|
||||||
@ -1025,11 +1092,13 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
gen_code_size = tcg_gen_code(&tcg_ctx, gen_code_buf);
|
gen_code_size = tcg_gen_code(&tcg_ctx, gen_code_buf);
|
||||||
|
search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
|
||||||
|
|
||||||
#ifdef CONFIG_PROFILER
|
#ifdef CONFIG_PROFILER
|
||||||
tcg_ctx.code_time += profile_getclock();
|
tcg_ctx.code_time += profile_getclock();
|
||||||
tcg_ctx.code_in_len += tb->size;
|
tcg_ctx.code_in_len += tb->size;
|
||||||
tcg_ctx.code_out_len += gen_code_size;
|
tcg_ctx.code_out_len += gen_code_size;
|
||||||
|
tcg_ctx.search_out_len += search_size;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef DEBUG_DISAS
|
#ifdef DEBUG_DISAS
|
||||||
@ -1041,8 +1110,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
tcg_ctx.code_gen_ptr = (void *)(((uintptr_t)gen_code_buf +
|
tcg_ctx.code_gen_ptr = (void *)
|
||||||
gen_code_size + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
|
ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
|
||||||
|
CODE_GEN_ALIGN);
|
||||||
|
|
||||||
/* check next page if needed */
|
/* check next page if needed */
|
||||||
virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
|
virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
|
||||||
|
Loading…
Reference in New Issue
Block a user