From 69cb498c56263a5ae484fd4fef920d3d3eea04c8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 29 May 2024 17:46:17 +0200 Subject: [PATCH 01/42] target/i386: fix pushed value of EFLAGS.RF When preparing an exception stack frame for a fault exception, the value pushed for RF is 1. Take that into account. The same should be true of interrupts for repeated string instructions, but the situation there is complicated. Signed-off-by: Paolo Bonzini --- target/i386/tcg/seg_helper.c | 49 ++++++++++++++++++++++++++++++++---- target/i386/tcg/translate.c | 8 ++++++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index 0301459004..715db1f232 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -526,6 +526,24 @@ static inline unsigned int get_sp_mask(unsigned int e2) } } +static int exception_is_fault(int intno) +{ + switch (intno) { + /* + * #DB can be both fault- and trap-like, but it never sets RF=1 + * in the RFLAGS value pushed on the stack. + */ + case EXCP01_DB: + case EXCP03_INT3: + case EXCP04_INTO: + case EXCP08_DBLE: + case EXCP12_MCHK: + return 0; + } + /* Everything else including reserved exception is a fault. */ + return 1; +} + int exception_has_error_code(int intno) { switch (intno) { @@ -605,8 +623,9 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, int type, dpl, selector, ss_dpl, cpl; int has_error_code, new_stack, shift; uint32_t e1, e2, offset, ss = 0, esp, ss_e1 = 0, ss_e2 = 0; - uint32_t old_eip, sp_mask; + uint32_t old_eip, sp_mask, eflags; int vm86 = env->eflags & VM_MASK; + bool set_rf; has_error_code = 0; if (!is_int && !is_hw) { @@ -614,8 +633,10 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, } if (is_int) { old_eip = next_eip; + set_rf = false; } else { old_eip = env->eip; + set_rf = exception_is_fault(intno); } dt = &env->idt; @@ -748,6 +769,15 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, } push_size <<= shift; #endif + eflags = cpu_compute_eflags(env); + /* + * AMD states that code breakpoint #DBs clear RF=0, Intel leaves it + * as is. AMD behavior could be implemented in check_hw_breakpoints(). + */ + if (set_rf) { + eflags |= RF_MASK; + } + if (shift == 1) { if (new_stack) { if (vm86) { @@ -759,7 +789,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, PUSHL(ssp, esp, sp_mask, env->segs[R_SS].selector); PUSHL(ssp, esp, sp_mask, env->regs[R_ESP]); } - PUSHL(ssp, esp, sp_mask, cpu_compute_eflags(env)); + PUSHL(ssp, esp, sp_mask, eflags); PUSHL(ssp, esp, sp_mask, env->segs[R_CS].selector); PUSHL(ssp, esp, sp_mask, old_eip); if (has_error_code) { @@ -776,7 +806,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, PUSHW(ssp, esp, sp_mask, env->segs[R_SS].selector); PUSHW(ssp, esp, sp_mask, env->regs[R_ESP]); } - PUSHW(ssp, esp, sp_mask, cpu_compute_eflags(env)); + PUSHW(ssp, esp, sp_mask, eflags); PUSHW(ssp, esp, sp_mask, env->segs[R_CS].selector); PUSHW(ssp, esp, sp_mask, old_eip); if (has_error_code) { @@ -868,8 +898,9 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, target_ulong ptr; int type, dpl, selector, cpl, ist; int has_error_code, new_stack; - uint32_t e1, e2, e3, ss; + uint32_t e1, e2, e3, ss, eflags; target_ulong old_eip, esp, offset; + bool set_rf; has_error_code = 0; if (!is_int && !is_hw) { @@ -877,8 +908,10 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, } if (is_int) { old_eip = next_eip; + set_rf = false; } else { old_eip = env->eip; + set_rf = exception_is_fault(intno); } dt = &env->idt; @@ -950,9 +983,15 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, } esp &= ~0xfLL; /* align stack */ + /* See do_interrupt_protected. */ + eflags = cpu_compute_eflags(env); + if (set_rf) { + eflags |= RF_MASK; + } + PUSHQ(esp, env->segs[R_SS].selector); PUSHQ(esp, env->regs[R_ESP]); - PUSHQ(esp, cpu_compute_eflags(env)); + PUSHQ(esp, eflags); PUSHQ(esp, env->segs[R_CS].selector); PUSHQ(esp, old_eip); if (has_error_code) { diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 0486ab6911..d438f8f76f 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4630,6 +4630,14 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) * If jmp_opt, we want to handle each string instruction individually. * For icount also disable repz optimization so that each iteration * is accounted separately. + * + * FIXME: this is messy; it makes REP string instructions a lot less + * efficient than they should be and it gets in the way of correct + * handling of RF (interrupts or traps arriving after any iteration + * of a repeated string instruction but the last should set RF to 1). + * Perhaps it would be more efficient if REP string instructions were + * always at the beginning of the TB, or even their own TB? That + * would even allow accounting up to 64k iterations at once for icount. */ dc->repz_opt = !dc->jmp_opt && !(cflags & CF_USE_ICOUNT); From 73fb7b3c4983e48f3081fca00013a996abf659c0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 29 May 2024 13:17:27 +0200 Subject: [PATCH 02/42] target/i386: fix implementation of ICEBP ICEBP generates a trap-like exception, while gen_exception() produces a fault. Resurrect gen_update_eip_next() to implement the desired semantics. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- target/i386/helper.h | 1 + target/i386/tcg/emit.c.inc | 5 ++++- target/i386/tcg/excp_helper.c | 20 ++++++++++++++++++++ target/i386/tcg/helper-tcg.h | 12 +++++++++++- target/i386/tcg/translate.c | 13 +++++++++++++ 5 files changed, 49 insertions(+), 2 deletions(-) diff --git a/target/i386/helper.h b/target/i386/helper.h index a52a1bf0f2..8f291a5f66 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -56,6 +56,7 @@ DEF_HELPER_2(sysret, void, env, int) DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int) DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int) DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int) +DEF_HELPER_FLAGS_1(icebp, TCG_CALL_NO_WG, noreturn, env) DEF_HELPER_3(boundw, void, env, tl, int) DEF_HELPER_3(boundl, void, env, tl, int) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index e990141454..36127d9994 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1858,7 +1858,10 @@ static void gen_INT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) static void gen_INT1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - gen_exception(s, EXCP01_DB); + gen_update_cc_op(s); + gen_update_eip_next(s); + gen_helper_icebp(tcg_env); + s->base.is_jmp = DISAS_NORETURN; } static void gen_INT3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c index 65e37ae2a0..72387aac24 100644 --- a/target/i386/tcg/excp_helper.c +++ b/target/i386/tcg/excp_helper.c @@ -140,6 +140,26 @@ G_NORETURN void raise_exception_ra(CPUX86State *env, int exception_index, raise_interrupt2(env, exception_index, 0, 0, 0, retaddr); } +G_NORETURN void helper_icebp(CPUX86State *env) +{ + CPUState *cs = env_cpu(env); + + do_end_instruction(env); + + /* + * INT1 aka ICEBP generates a trap-like #DB, but it is pretty special. + * + * "Although the ICEBP instruction dispatches through IDT vector 1, + * that event is not interceptable by means of the #DB exception + * intercept". Instead there is a separate fault-like ICEBP intercept. + */ + cs->exception_index = EXCP01_DB; + env->error_code = 0; + env->exception_is_int = 0; + env->exception_next_eip = env->eip; + cpu_loop_exit(cs); +} + G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr, MMUAccessType access_type, uintptr_t retaddr) diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index 85957943bf..eb6a4926a4 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -111,7 +111,17 @@ int exception_has_error_code(int intno); /* smm_helper.c */ void do_smm_enter(X86CPU *cpu); -/* bpt_helper.c */ +/* sysemu/bpt_helper.c */ bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update); +/* + * Do the tasks usually performed by gen_eob(). Callers of this function + * should also handle TF as appropriate. + */ +static inline void do_end_instruction(CPUX86State *env) +{ + /* needed if sti is just before */ + env->hflags &= ~HF_INHIBIT_IRQ_MASK; + env->eflags &= ~HF_RF_MASK; +} #endif /* I386_HELPER_TCG_H */ diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index d438f8f76f..77ed9c1db4 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -549,6 +549,19 @@ static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d) } } +static void gen_update_eip_next(DisasContext *s) +{ + assert(s->pc_save != -1); + if (tb_cflags(s->base.tb) & CF_PCREL) { + tcg_gen_addi_tl(cpu_eip, cpu_eip, s->pc - s->pc_save); + } else if (CODE64(s)) { + tcg_gen_movi_tl(cpu_eip, s->pc); + } else { + tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->pc - s->cs_base)); + } + s->pc_save = s->pc; +} + static void gen_update_eip_cur(DisasContext *s) { assert(s->pc_save != -1); From 536032566b1fc1f4b66450770dbb30b49e736b52 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 29 May 2024 15:12:22 +0200 Subject: [PATCH 03/42] target/i386: cleanup HLT helpers Use decode.c's support for intercepts, doing the check in TCG-generated code rather than the helper. This is cleaner because it allows removing the eip_addend argument to helper_hlt(). Signed-off-by: Paolo Bonzini --- target/i386/helper.h | 2 +- target/i386/tcg/decode-new.c.inc | 4 ++-- target/i386/tcg/emit.c.inc | 4 ++-- target/i386/tcg/sysemu/misc_helper.c | 13 ++----------- 4 files changed, 7 insertions(+), 16 deletions(-) diff --git a/target/i386/helper.h b/target/i386/helper.h index 8f291a5f66..c244dbb481 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -90,7 +90,7 @@ DEF_HELPER_2(vmsave, void, env, int) DEF_HELPER_1(stgi, void, env) DEF_HELPER_1(clgi, void, env) DEF_HELPER_FLAGS_2(flush_page, TCG_CALL_NO_RWG, void, env, tl) -DEF_HELPER_FLAGS_2(hlt, TCG_CALL_NO_WG, noreturn, env, int) +DEF_HELPER_FLAGS_1(hlt, TCG_CALL_NO_WG, noreturn, env) DEF_HELPER_FLAGS_2(monitor, TCG_CALL_NO_WG, void, env, tl) DEF_HELPER_FLAGS_2(mwait, TCG_CALL_NO_WG, noreturn, env, int) DEF_HELPER_1(rdmsr, void, env) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 0ff0866e8f..376d2bdabe 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -1496,7 +1496,7 @@ static const X86OpEntry opcodes_root[256] = { [0xE7] = X86_OP_ENTRYrr(OUT, 0,v, I_unsigned,b), /* AX/EAX */ [0xF1] = X86_OP_ENTRY0(INT1, svm(ICEBP)), - [0xF4] = X86_OP_ENTRY0(HLT, chk(cpl0)), + [0xF4] = X86_OP_ENTRY0(HLT, chk(cpl0) svm(HLT)), [0xF5] = X86_OP_ENTRY0(CMC), [0xF6] = X86_OP_GROUP1(group3, E,b), [0xF7] = X86_OP_GROUP1(group3, E,v), @@ -2539,7 +2539,7 @@ static void disas_insn(DisasContext *s, CPUState *cpu) /* * Checks that result in #GP or VMEXIT come second. Intercepts are - * generally checked after non-memory exceptions (i.e. before all + * generally checked after non-memory exceptions (i.e. after all * exceptions if there is no memory operand). Exceptions are * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!). * diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 36127d9994..2e94e8ec56 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -1638,8 +1638,8 @@ static void gen_HLT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { #ifdef CONFIG_SYSTEM_ONLY gen_update_cc_op(s); - gen_update_eip_cur(s); - gen_helper_hlt(tcg_env, cur_insn_len_i32(s)); + gen_update_eip_next(s); + gen_helper_hlt(tcg_env); s->base.is_jmp = DISAS_NORETURN; #endif } diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index edb7c3d894..e41c88346c 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -516,8 +516,7 @@ void helper_flush_page(CPUX86State *env, target_ulong addr) tlb_flush_page(env_cpu(env), addr); } -static G_NORETURN -void do_hlt(CPUX86State *env) +G_NORETURN void helper_hlt(CPUX86State *env) { CPUState *cs = env_cpu(env); @@ -527,14 +526,6 @@ void do_hlt(CPUX86State *env) cpu_loop_exit(cs); } -G_NORETURN void helper_hlt(CPUX86State *env, int next_eip_addend) -{ - cpu_svm_check_intercept_param(env, SVM_EXIT_HLT, 0, GETPC()); - env->eip += next_eip_addend; - - do_hlt(env); -} - void helper_monitor(CPUX86State *env, target_ulong ptr) { if ((uint32_t)env->regs[R_ECX] != 0) { @@ -558,6 +549,6 @@ G_NORETURN void helper_mwait(CPUX86State *env, int next_eip_addend) if (cs->cpu_index != 0 || CPU_NEXT(cs) != NULL) { do_pause(env); } else { - do_hlt(env); + helper_hlt(env); } } From 330e6adc1acd2a235a96b502b3dd15ba6e77c228 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 29 May 2024 15:12:22 +0200 Subject: [PATCH 04/42] target/i386: cleanup PAUSE helpers Use decode.c's support for intercepts, doing the check in TCG-generated code rather than the helper. This is cleaner because it allows removing the eip_addend argument to helper_pause(), even though it adds a bit of bloat for opcode 0x90's new decoding function. Signed-off-by: Paolo Bonzini --- target/i386/helper.h | 2 +- target/i386/tcg/decode-new.c.inc | 15 ++++++++++++++- target/i386/tcg/emit.c.inc | 20 ++++++++------------ target/i386/tcg/helper-tcg.h | 1 - target/i386/tcg/misc_helper.c | 10 +--------- target/i386/tcg/sysemu/misc_helper.c | 2 +- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/target/i386/helper.h b/target/i386/helper.h index c244dbb481..2f46cffabd 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -53,7 +53,7 @@ DEF_HELPER_1(sysenter, void, env) DEF_HELPER_2(sysexit, void, env, int) DEF_HELPER_2(syscall, void, env, int) DEF_HELPER_2(sysret, void, env, int) -DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int) +DEF_HELPER_FLAGS_1(pause, TCG_CALL_NO_WG, noreturn, env) DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int) DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int) DEF_HELPER_FLAGS_1(icebp, TCG_CALL_NO_WG, noreturn, env) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 376d2bdabe..c2d8da8d14 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -1359,6 +1359,19 @@ static void decode_group11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, } } +static void decode_90(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static X86OpEntry pause = X86_OP_ENTRY0(PAUSE, svm(PAUSE)); + static X86OpEntry nop = X86_OP_ENTRY0(NOP); + static X86OpEntry xchg_ax = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v); + + if (REX_B(s)) { + *entry = xchg_ax; + } else { + *entry = (s->prefix & PREFIX_REPZ) ? pause : nop; + } +} + static const X86OpEntry opcodes_root[256] = { [0x00] = X86_OP_ENTRY2(ADD, E,b, G,b, lock), [0x01] = X86_OP_ENTRY2(ADD, E,v, G,v, lock), @@ -1441,7 +1454,7 @@ static const X86OpEntry opcodes_root[256] = { [0x86] = X86_OP_ENTRY2(XCHG, E,b, G,b, xchg), [0x87] = X86_OP_ENTRY2(XCHG, E,v, G,v, xchg), - [0x90] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), + [0x90] = X86_OP_GROUP0(90), [0x91] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), [0x92] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), [0x93] = X86_OP_ENTRY2(XCHG, 0,v, LoBits,v), diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 2e94e8ec56..f90f3d3c58 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -2350,6 +2350,14 @@ static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) decode->op[1].offset, vec_len, vec_len); } +static void gen_PAUSE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_update_eip_next(s); + gen_helper_pause(tcg_env); + s->base.is_jmp = DISAS_NORETURN; +} + static void gen_PCMPESTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); @@ -4014,18 +4022,6 @@ static void gen_WAIT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) static void gen_XCHG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { - if (decode->b == 0x90 && !REX_B(s)) { - if (s->prefix & PREFIX_REPZ) { - gen_update_cc_op(s); - gen_update_eip_cur(s); - gen_helper_pause(tcg_env, cur_insn_len_i32(s)); - s->base.is_jmp = DISAS_NORETURN; - } - /* No writeback. */ - decode->op[0].unit = X86_OP_SKIP; - return; - } - if (s->prefix & PREFIX_LOCK) { tcg_gen_atomic_xchg_tl(s->T0, s->A0, s->T1, s->mem_index, decode->op[0].ot | MO_LE); diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h index eb6a4926a4..15d6c6f8b4 100644 --- a/target/i386/tcg/helper-tcg.h +++ b/target/i386/tcg/helper-tcg.h @@ -91,7 +91,6 @@ extern const uint8_t parity_table[256]; /* misc_helper.c */ void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask); -G_NORETURN void do_pause(CPUX86State *env); /* sysemu/svm_helper.c */ #ifndef CONFIG_USER_ONLY diff --git a/target/i386/tcg/misc_helper.c b/target/i386/tcg/misc_helper.c index b0f0f7b893..8316d42ffc 100644 --- a/target/i386/tcg/misc_helper.c +++ b/target/i386/tcg/misc_helper.c @@ -88,7 +88,7 @@ G_NORETURN void helper_rdpmc(CPUX86State *env) raise_exception_err(env, EXCP06_ILLOP, 0); } -G_NORETURN void do_pause(CPUX86State *env) +G_NORETURN void helper_pause(CPUX86State *env) { CPUState *cs = env_cpu(env); @@ -97,14 +97,6 @@ G_NORETURN void do_pause(CPUX86State *env) cpu_loop_exit(cs); } -G_NORETURN void helper_pause(CPUX86State *env, int next_eip_addend) -{ - cpu_svm_check_intercept_param(env, SVM_EXIT_PAUSE, 0, GETPC()); - env->eip += next_eip_addend; - - do_pause(env); -} - uint64_t helper_rdpkru(CPUX86State *env, uint32_t ecx) { if ((env->cr[4] & CR4_PKE_MASK) == 0) { diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index e41c88346c..093cc2d0f9 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -547,7 +547,7 @@ G_NORETURN void helper_mwait(CPUX86State *env, int next_eip_addend) /* XXX: not complete but not completely erroneous */ if (cs->cpu_index != 0 || CPU_NEXT(cs) != NULL) { - do_pause(env); + helper_pause(env); } else { helper_hlt(env); } From 57f8dbdbe94a502301f51809e8b282b02df43370 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 29 May 2024 13:18:56 +0200 Subject: [PATCH 05/42] target/i386: implement DR7.GD DR7.GD triggers a #DB exception on any access to debug registers. The GD bit is cleared so that the #DB handler itself can access the debug registers. Signed-off-by: Paolo Bonzini --- target/i386/tcg/sysemu/bpt_helper.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/target/i386/tcg/sysemu/bpt_helper.c b/target/i386/tcg/sysemu/bpt_helper.c index 4d96a48a3c..c1d5fce250 100644 --- a/target/i386/tcg/sysemu/bpt_helper.c +++ b/target/i386/tcg/sysemu/bpt_helper.c @@ -238,6 +238,12 @@ target_ulong helper_get_dr(CPUX86State *env, int reg) } } + if (env->dr[7] & DR7_GD) { + env->dr[7] &= ~DR7_GD; + env->dr[6] |= DR6_BD; + raise_exception_ra(env, EXCP01_DB, GETPC()); + } + return env->dr[reg]; } @@ -251,6 +257,12 @@ void helper_set_dr(CPUX86State *env, int reg, target_ulong t0) } } + if (env->dr[7] & DR7_GD) { + env->dr[7] &= ~DR7_GD; + env->dr[6] |= DR6_BD; + raise_exception_ra(env, EXCP01_DB, GETPC()); + } + if (reg < 4) { if (hw_breakpoint_enabled(env->dr[7], reg) && hw_breakpoint_type(env->dr[7], reg) != DR7_TYPE_IO_RW) { From 8aa76496dfaac0d7b0dd34793359680c90d9aea0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 29 May 2024 15:41:56 +0200 Subject: [PATCH 06/42] target/i386: disable/enable breakpoints on vmentry/vmexit If the required DR7 (either from the VMCB or from the host save area) disables a breakpoint that was enabled prior to vmentry or vmexit, it is left enabled and will trigger EXCP_DEBUG. This causes a spurious #DB on the next crossing of the breakpoint. To disable it, vmentry/vmexit must use cpu_x86_update_dr7 to load DR7. Because cpu_x86_update_dr7 takes a 32-bit argument, check reserved bits prior to calling cpu_x86_update_dr7, and do the same for DR6 as well for consistency. This scenario is tested by the "host_rflags" test in kvm-unit-tests. Signed-off-by: Paolo Bonzini --- target/i386/tcg/sysemu/svm_helper.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c index 5d6de2294f..922d8964f8 100644 --- a/target/i386/tcg/sysemu/svm_helper.c +++ b/target/i386/tcg/sysemu/svm_helper.c @@ -163,6 +163,8 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) uint64_t new_cr0; uint64_t new_cr3; uint64_t new_cr4; + uint64_t new_dr6; + uint64_t new_dr7; if (aflag == 2) { addr = env->regs[R_EAX]; @@ -361,20 +363,22 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) env->vm_vmcb + offsetof(struct vmcb, save.rsp)); env->regs[R_EAX] = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.rax)); - env->dr[7] = x86_ldq_phys(cs, - env->vm_vmcb + offsetof(struct vmcb, save.dr7)); - env->dr[6] = x86_ldq_phys(cs, - env->vm_vmcb + offsetof(struct vmcb, save.dr6)); + + new_dr7 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.dr7)); + new_dr6 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.dr6)); #ifdef TARGET_X86_64 - if (env->dr[6] & DR_RESERVED_MASK) { + if (new_dr7 & DR_RESERVED_MASK) { cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); } - if (env->dr[7] & DR_RESERVED_MASK) { + if (new_dr6 & DR_RESERVED_MASK) { cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); } #endif + cpu_x86_update_dr7(env, new_dr7); + env->dr[6] = new_dr6; + if (is_efer_invalid_state(env)) { cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); } @@ -864,8 +868,11 @@ void do_vmexit(CPUX86State *env) env->dr[6] = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.dr6)); - env->dr[7] = x86_ldq_phys(cs, - env->vm_hsave + offsetof(struct vmcb, save.dr7)); + + /* Disables all breakpoints in the host DR7 register. */ + cpu_x86_update_dr7(env, + x86_ldq_phys(cs, + env->vm_hsave + offsetof(struct vmcb, save.dr7)) & ~0xff); /* other setups */ x86_stl_phys(cs, @@ -891,8 +898,6 @@ void do_vmexit(CPUX86State *env) from the page table indicated the host's CR3. If the PDPEs contain illegal state, the processor causes a shutdown. */ - /* Disables all breakpoints in the host DR7 register. */ - /* Checks the reloaded host state for consistency. */ /* If the host's rIP reloaded by #VMEXIT is outside the limit of the From 1a150d331d9bbd882c8b93146ff7fbc6259f0961 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 25 May 2024 10:30:50 +0200 Subject: [PATCH 07/42] target/i386: fix INHIBIT_IRQ/TF/RF handling for VMRUN From vm entry to exit, VMRUN is handled as a single instruction. It uses DISAS_NORETURN in order to avoid processing TF or RF before the first instruction executes in the guest. However, the corresponding handling is missing in vmexit. Add it, and at the same time reorganize the comments with quotes from the manual about the tasks performed by a #VMEXIT. Another gen_eob() task that is missing in VMRUN is preparing the HF_INHIBIT_IRQ flag for the next instruction, in this case by loading it from the VMCB control state. Signed-off-by: Paolo Bonzini --- target/i386/tcg/sysemu/svm_helper.c | 46 +++++++++++++++++++++-------- target/i386/tcg/translate.c | 5 ++++ 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c index 922d8964f8..9db8ad62a0 100644 --- a/target/i386/tcg/sysemu/svm_helper.c +++ b/target/i386/tcg/sysemu/svm_helper.c @@ -254,6 +254,13 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) control.intercept_exceptions )); + env->hflags &= ~HF_INHIBIT_IRQ_MASK; + if (x86_ldl_phys(cs, env->vm_vmcb + + offsetof(struct vmcb, control.int_state)) & + SVM_INTERRUPT_SHADOW_MASK) { + env->hflags |= HF_INHIBIT_IRQ_MASK; + } + nested_ctl = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.nested_ctl)); asid = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, @@ -815,8 +822,12 @@ void do_vmexit(CPUX86State *env) env->hflags &= ~HF_GUEST_MASK; env->intercept = 0; env->intercept_exceptions = 0; + + /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */ cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; env->int_ctl = 0; + + /* Clears the TSC_OFFSET inside the processor. */ env->tsc_offset = 0; env->gdt.base = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, @@ -836,6 +847,15 @@ void do_vmexit(CPUX86State *env) cpu_x86_update_cr4(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.cr4))); + + /* + * Resets the current ASID register to zero (host ASID; TLB flush). + * + * If the host is in PAE mode, the processor reloads the host's PDPEs + * from the page table indicated the host's CR3. FIXME: If the PDPEs + * contain illegal state, the processor causes a shutdown (QEMU does + * not implement PDPTRs). + */ cpu_x86_update_cr3(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.cr3))); @@ -843,12 +863,14 @@ void do_vmexit(CPUX86State *env) set properly */ cpu_load_efer(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.efer))); + + /* Completion of the VMRUN instruction clears the host EFLAGS.RF bit. */ env->eflags = 0; cpu_load_eflags(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.rflags)), ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK | - VM_MASK)); + RF_MASK | VM_MASK)); svm_load_seg_cache(env, MMU_PHYS_IDX, env->vm_hsave + offsetof(struct vmcb, save.es), R_ES); @@ -888,19 +910,17 @@ void do_vmexit(CPUX86State *env) env->hflags2 &= ~HF2_GIF_MASK; env->hflags2 &= ~HF2_VGIF_MASK; - /* FIXME: Resets the current ASID register to zero (host ASID). */ - /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */ - /* Clears the TSC_OFFSET inside the processor. */ + /* FIXME: Checks the reloaded host state for consistency. */ - /* If the host is in PAE mode, the processor reloads the host's PDPEs - from the page table indicated the host's CR3. If the PDPEs contain - illegal state, the processor causes a shutdown. */ - - /* Checks the reloaded host state for consistency. */ - - /* If the host's rIP reloaded by #VMEXIT is outside the limit of the - host's code segment or non-canonical (in the case of long mode), a - #GP fault is delivered inside the host. */ + /* + * EFLAGS.TF causes a #DB trap after the VMRUN completes on the host + * side (i.e., after the #VMEXIT from the guest). Since we're running + * in the main loop, call do_interrupt_all directly. + */ + if ((env->eflags & TF_MASK) != 0) { + env->dr[6] |= DR6_BS; + do_interrupt_all(X86_CPU(cs), EXCP01_DB, 0, 0, env->eip, 0); + } } diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 77ed9c1db4..a9c6424c7d 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -3745,6 +3745,11 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } gen_update_cc_op(s); gen_update_eip_cur(s); + /* + * Reloads INHIBIT_IRQ mask as well as TF and RF with guest state. + * The usual gen_eob() handling is performed on vmexit after + * host state is reloaded. + */ gen_helper_vmrun(tcg_env, tcg_constant_i32(s->aflag - 1), cur_insn_len_i32(s)); tcg_gen_exit_tb(NULL, 0); From 3718523d011e898d414f09a4ed43cf13d76de0b4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 25 May 2024 10:47:31 +0200 Subject: [PATCH 08/42] target/i386: fix INHIBIT_IRQ/TF/RF handling for PAUSE PAUSE uses DISAS_NORETURN because the corresponding helper calls cpu_loop_exit(). However, while HLT clear HF_INHIBIT_IRQ_MASK to correctly handle "STI; HLT", the same is missing from PAUSE. And also gen_eob() clears HF_RF_MASK and synthesizes a #DB exception if single-step is active; none of this is done by HLT and PAUSE. Start fixing PAUSE, HLT will follow. Signed-off-by: Paolo Bonzini --- target/i386/tcg/misc_helper.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/target/i386/tcg/misc_helper.c b/target/i386/tcg/misc_helper.c index 8316d42ffc..ed4cda8001 100644 --- a/target/i386/tcg/misc_helper.c +++ b/target/i386/tcg/misc_helper.c @@ -92,6 +92,10 @@ G_NORETURN void helper_pause(CPUX86State *env) { CPUState *cs = env_cpu(env); + /* Do gen_eob() tasks before going back to the main loop. */ + do_end_instruction(env); + helper_rechecking_single_step(env); + /* Just let another CPU run. */ cs->exception_index = EXCP_INTERRUPT; cpu_loop_exit(cs); From 6dd7d8c6490b73dcc33dfb1fe76c081e7e2eb820 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 25 May 2024 10:47:31 +0200 Subject: [PATCH 09/42] target/i386: fix TF/RF handling for HLT HLT uses DISAS_NORETURN because the corresponding helper calls cpu_loop_exit(). However, while gen_eob() clears HF_RF_MASK and synthesizes a #DB exception if single-step is active, none of this is done by HLT. Note that the single-step trap is generated after the halt is finished. Signed-off-by: Paolo Bonzini --- target/i386/tcg/sysemu/misc_helper.c | 2 +- target/i386/tcg/sysemu/seg_helper.c | 17 ++++++++++++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index 093cc2d0f9..7fa0c5a06d 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -520,7 +520,7 @@ G_NORETURN void helper_hlt(CPUX86State *env) { CPUState *cs = env_cpu(env); - env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */ + do_end_instruction(env); cs->halted = 1; cs->exception_index = EXCP_HLT; cpu_loop_exit(cs); diff --git a/target/i386/tcg/sysemu/seg_helper.c b/target/i386/tcg/sysemu/seg_helper.c index 9ba94deb3a..05174a79e7 100644 --- a/target/i386/tcg/sysemu/seg_helper.c +++ b/target/i386/tcg/sysemu/seg_helper.c @@ -130,15 +130,26 @@ void x86_cpu_do_interrupt(CPUState *cs) bool x86_cpu_exec_halt(CPUState *cpu) { - if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { - X86CPU *x86_cpu = X86_CPU(cpu); + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { bql_lock(); apic_poll_irq(x86_cpu->apic_state); cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); bql_unlock(); } - return cpu_has_work(cpu); + + if (!cpu_has_work(cpu)) { + return false; + } + + /* Complete HLT instruction. */ + if (env->eflags & TF_MASK) { + env->dr[6] |= DR6_BS; + do_interrupt_all(x86_cpu, EXCP01_DB, 0, 0, env->eip, 0); + } + return true; } bool x86_need_replay_interrupt(int interrupt_request) From cdc829b37d4dff686f083c577490da1d75bc159f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 29 May 2024 13:31:39 +0200 Subject: [PATCH 10/42] target/i386: document incorrect semantics of watchpoint following MOV/POP SS Signed-off-by: Paolo Bonzini --- target/i386/tcg/sysemu/bpt_helper.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/target/i386/tcg/sysemu/bpt_helper.c b/target/i386/tcg/sysemu/bpt_helper.c index c1d5fce250..b29acf41c3 100644 --- a/target/i386/tcg/sysemu/bpt_helper.c +++ b/target/i386/tcg/sysemu/bpt_helper.c @@ -215,6 +215,12 @@ void breakpoint_handler(CPUState *cs) if (cs->watchpoint_hit->flags & BP_CPU) { cs->watchpoint_hit = NULL; if (check_hw_breakpoints(env, false)) { + /* + * FIXME: #DB should be delayed by one instruction if + * INHIBIT_IRQ is set (STI cannot trigger a watchpoint). + * The delayed #DB should also fuse with one generated + * by ICEBP (aka INT1). + */ raise_exception(env, EXCP01_DB); } else { cpu_loop_exit_noexc(cs); From b37c0dc85214e9d5e4a9b6f6a496ce4de3b8a4d6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 25 May 2024 11:16:14 +0200 Subject: [PATCH 11/42] target/i386: document use of DISAS_NORETURN DISAS_NORETURN suppresses the work normally done by gen_eob(), and therefore must be used in special cases only. Document them. Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index a9c6424c7d..2b6f67be40 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -4761,6 +4761,17 @@ static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) switch (dc->base.is_jmp) { case DISAS_NORETURN: + /* + * Most instructions should not use DISAS_NORETURN, as that suppresses + * the handling of hflags normally done by gen_eob(). We can + * get here: + * - for exception and interrupts + * - for jump optimization (which is disabled by INHIBIT_IRQ/RF/TF) + * - for VMRUN because RF/TF handling for the host is done after vmexit, + * and INHIBIT_IRQ is loaded from the VMCB + * - for HLT/PAUSE/MWAIT to exit the main loop with specific EXCP_* values; + * the helpers handle themselves the tasks normally done by gen_eob(). + */ break; case DISAS_TOO_MANY: gen_update_cc_op(dc); From f41990f552839ad016467586a9540b9455cc52fd Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Thu, 6 Jun 2024 10:53:16 +0100 Subject: [PATCH 12/42] target/i386: use local X86DecodedOp in gen_POP() This will make subsequent changes a little easier to read. Signed-off-by: Mark Cave-Ayland Message-ID: <20240606095319.229650-2-mark.cave-ayland@ilande.co.uk> Signed-off-by: Paolo Bonzini --- target/i386/tcg/emit.c.inc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index f90f3d3c58..ca78504b6e 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -2575,11 +2575,13 @@ static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) { + X86DecodedOp *op = &decode->op[0]; MemOp ot = gen_pop_T0(s); - if (decode->op[0].has_ea) { + + if (op->has_ea) { /* NOTE: order is important for MMU exceptions */ gen_op_st_v(s, ot, s->T0, s->A0); - decode->op[0].unit = X86_OP_SKIP; + op->unit = X86_OP_SKIP; } /* NOTE: writing back registers after update is important for pop %sp */ gen_pop_update(s, ot); From aea49fbb01a4a1191165af0b08b5e27994f22c35 Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Thu, 6 Jun 2024 10:53:17 +0100 Subject: [PATCH 13/42] target/i386: use gen_writeback() within gen_POP() Instead of directly implementing the writeback using gen_op_st_v(), use the existing gen_writeback() function. Suggested-by: Paolo Bonzini Signed-off-by: Mark Cave-Ayland Message-ID: <20240606095319.229650-3-mark.cave-ayland@ilande.co.uk> Signed-off-by: Paolo Bonzini --- target/i386/tcg/emit.c.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index ca78504b6e..6123235c00 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -2580,9 +2580,9 @@ static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) if (op->has_ea) { /* NOTE: order is important for MMU exceptions */ - gen_op_st_v(s, ot, s->T0, s->A0); - op->unit = X86_OP_SKIP; + gen_writeback(s, decode, 0, s->T0); } + /* NOTE: writing back registers after update is important for pop %sp */ gen_pop_update(s, ot); } From f1b8613da380af8931e34dfb4dd0a6cca392d43b Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Thu, 6 Jun 2024 10:53:18 +0100 Subject: [PATCH 14/42] target/i386: fix SP when taking a memory fault during POP When OS/2 Warp configures its segment descriptors, many of them are configured with the P flag clear to allow for a fault-on-demand implementation. In the case where the stack value is POPped into the segment registers, the SP is incremented before calling gen_helper_load_seg() to validate the segment descriptor: IN: 0xffef2c0c: 66 07 popl %es OP: ld_i32 loc9,env,$0xfffffffffffffff8 sub_i32 loc9,loc9,$0x1 brcond_i32 loc9,$0x0,lt,$L0 st16_i32 loc9,env,$0xfffffffffffffff8 st8_i32 $0x1,env,$0xfffffffffffffffc ---- 0000000000000c0c 0000000000000000 ext16u_i64 loc0,rsp add_i64 loc0,loc0,ss_base ext32u_i64 loc0,loc0 qemu_ld_a64_i64 loc0,loc0,noat+un+leul,5 add_i64 loc3,rsp,$0x4 deposit_i64 rsp,rsp,loc3,$0x0,$0x10 extrl_i64_i32 loc5,loc0 call load_seg,$0x0,$0,env,$0x0,loc5 add_i64 rip,rip,$0x2 ext16u_i64 rip,rip exit_tb $0x0 set_label $L0 exit_tb $0x7fff58000043 If helper_load_seg() generates a fault when validating the segment descriptor then as the SP has already been incremented, the topmost word of the stack is overwritten by the arguments pushed onto the stack by the CPU before taking the fault handler. As a consequence things rapidly go wrong upon return from the fault handler due to the corrupted stack. Update the logic for the existing writeback condition so that a POP into the segment registers also calls helper_load_seg() first before incrementing the SP, so that if a fault occurs the SP remains unaltered. Signed-off-by: Mark Cave-Ayland Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2198 Message-ID: <20240606095319.229650-4-mark.cave-ayland@ilande.co.uk> Fixes: cc1d28bdbe0 ("target/i386: move 00-5F opcodes to new decoder", 2024-05-07) Signed-off-by: Paolo Bonzini --- target/i386/tcg/emit.c.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 6123235c00..4be3d9a6fb 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -2578,7 +2578,7 @@ static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) X86DecodedOp *op = &decode->op[0]; MemOp ot = gen_pop_T0(s); - if (op->has_ea) { + if (op->has_ea || op->unit == X86_OP_SEG) { /* NOTE: order is important for MMU exceptions */ gen_writeback(s, decode, 0, s->T0); } From 3973615e7fbaeef1deeaa067577e373781ced70a Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Thu, 6 Jun 2024 10:53:19 +0100 Subject: [PATCH 15/42] target/i386: fix size of EBP writeback in gen_enter() The calculation of FrameTemp is done using the size indicated by mo_pushpop() before being written back to EBP, but the final writeback to EBP is done using the size indicated by mo_stacksize(). In the case where mo_pushpop() is MO_32 and mo_stacksize() is MO_16 then the final writeback to EBP is done using MO_16 which can leave junk in the top 16-bits of EBP after executing ENTER. Change the writeback of EBP to use the same size indicated by mo_pushpop() to ensure that the full value is written back. Signed-off-by: Mark Cave-Ayland Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2198 Message-ID: <20240606095319.229650-5-mark.cave-ayland@ilande.co.uk> Cc: qemu-stable@nongnu.org Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 2b6f67be40..fcba9c155f 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2138,7 +2138,7 @@ static void gen_enter(DisasContext *s, int esp_addend, int level) } /* Copy the FrameTemp value to EBP. */ - gen_op_mov_reg_v(s, a_ot, R_EBP, s->T1); + gen_op_mov_reg_v(s, d_ot, R_EBP, s->T1); /* Compute the final value of ESP. */ tcg_gen_subi_tl(s->T1, s->T1, esp_addend + size * level); From 75dbebddb6a0f15bdfdae66d63de4905c793ccdb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 6 Jun 2024 11:31:12 +0200 Subject: [PATCH 16/42] machine: default -M mem-merge to off is QEMU_MADV_MERGEABLE is not available Otherwise, starting any guest on a non-Linux guests results in qemu-system-arm: Couldn't set property 'merge' on 'memory-backend-ram': Invalid argument Cc: Michal Privoznik Signed-off-by: Paolo Bonzini --- hw/core/machine.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 77a356f232..a0ee43ca5c 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -17,6 +17,7 @@ #include "hw/loader.h" #include "qapi/error.h" #include "qapi/qapi-visit-machine.h" +#include "qemu/madvise.h" #include "qom/object_interfaces.h" #include "sysemu/cpus.h" #include "sysemu/sysemu.h" @@ -1129,7 +1130,7 @@ static void machine_initfn(Object *obj) container_get(obj, "/peripheral-anon"); ms->dump_guest_core = true; - ms->mem_merge = true; + ms->mem_merge = (QEMU_MADV_MERGEABLE != QEMU_MADV_INVALID); ms->enable_graphics = true; ms->kernel_cmdline = g_strdup(""); ms->ram_size = mc->default_ram_size; From 12d7d0c2496efe6325e73b04578213b8ad0af093 Mon Sep 17 00:00:00 2001 From: Michal Privoznik Date: Wed, 5 Jun 2024 12:44:54 +0200 Subject: [PATCH 17/42] meson: Don't even detect posix_madvise() on Darwin On Darwin, posix_madvise() has the same return semantics as plain madvise() [1]. That's not really what our usage expects. Fortunately, madvise() is available and preferred anyways so we may stop detecting posix_madvise() on Darwin. 1: https://opensource.apple.com/source/xnu/xnu-7195.81.3/bsd/man/man2/madvise.2.auto.html Signed-off-by: Michal Privoznik Message-ID: <00f71753bdeb8c0f049fda05fb63b84bb5502fb3.1717584048.git.mprivozn@redhat.com> Signed-off-by: Paolo Bonzini --- meson.build | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/meson.build b/meson.build index d80203f1cd..ec59effca2 100644 --- a/meson.build +++ b/meson.build @@ -2556,10 +2556,16 @@ config_host_data.set('CONFIG_OPEN_BY_HANDLE', cc.links(gnu_source_prefix + ''' #else int main(void) { struct file_handle fh; return open_by_handle_at(0, &fh, 0); } #endif''')) -config_host_data.set('CONFIG_POSIX_MADVISE', cc.links(gnu_source_prefix + ''' - #include - #include - int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }''')) + +# On Darwin posix_madvise() has the same return semantics as plain madvise(), +# i.e. errno is set and -1 is returned. That's not really how POSIX defines the +# function. On the flip side, it has madvise() which is preferred anyways. +if host_os != 'darwin' + config_host_data.set('CONFIG_POSIX_MADVISE', cc.links(gnu_source_prefix + ''' + #include + #include + int main(void) { return posix_madvise(NULL, 0, POSIX_MADV_DONTNEED); }''')) +endif config_host_data.set('CONFIG_PTHREAD_SETNAME_NP_W_TID', cc.links(gnu_source_prefix + ''' #include From bfb8c79f89773fb7ef6a373efebf3ce691716cb0 Mon Sep 17 00:00:00 2001 From: Michal Privoznik Date: Wed, 5 Jun 2024 12:44:55 +0200 Subject: [PATCH 18/42] osdep: Make qemu_madvise() to set errno in all cases The unspoken premise of qemu_madvise() is that errno is set on error. And it is mostly the case except for posix_madvise() which is documented to return either zero (on success) or a positive error number. This means, we must set errno ourselves. And while at it, make the function return a negative value on error, just like other error paths do. Signed-off-by: Michal Privoznik Reviewed-by: David Hildenbrand Message-ID: Signed-off-by: Paolo Bonzini --- util/osdep.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/util/osdep.c b/util/osdep.c index e996c4744a..e42f4e8121 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -57,7 +57,12 @@ int qemu_madvise(void *addr, size_t len, int advice) #if defined(CONFIG_MADVISE) return madvise(addr, len, advice); #elif defined(CONFIG_POSIX_MADVISE) - return posix_madvise(addr, len, advice); + int rc = posix_madvise(addr, len, advice); + if (rc) { + errno = rc; + return -1; + } + return 0; #else errno = EINVAL; return -1; From 210b7b2b3cdef01a15ebabf02dd1a9d8a51743ba Mon Sep 17 00:00:00 2001 From: Michal Privoznik Date: Wed, 5 Jun 2024 12:44:56 +0200 Subject: [PATCH 19/42] osdep: Make qemu_madvise() return ENOSYS on unsupported OSes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not every OS is capable of madvise() or posix_madvise() even. In that case, errno should be set to ENOSYS as it reflects the cause better. Signed-off-by: Michal Privoznik Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: David Hildenbrand Message-ID: Signed-off-by: Paolo Bonzini --- util/osdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/osdep.c b/util/osdep.c index e42f4e8121..5d23bbfbec 100644 --- a/util/osdep.c +++ b/util/osdep.c @@ -64,7 +64,7 @@ int qemu_madvise(void *addr, size_t len, int advice) } return 0; #else - errno = EINVAL; + errno = ENOSYS; return -1; #endif } From 5d9a9a617053a97a76ef332896c386d8fc895631 Mon Sep 17 00:00:00 2001 From: Michal Privoznik Date: Wed, 5 Jun 2024 12:44:58 +0200 Subject: [PATCH 20/42] backends/hostmem: Report error when memory size is unaligned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If memory-backend-{file,ram} has a size that's not aligned to underlying page size it is not only wasteful, but also may lead to hard to debug behaviour. For instance, in case memory-backend-file and hugepages, madvise() and mbind() fail. Rightfully so, page is the smallest unit they can work with. And even though an error is reported, the root cause it not very clear: qemu-system-x86_64: Couldn't set property 'dump' on 'memory-backend-file': Invalid argument After this commit: qemu-system-x86_64: backend 'memory-backend-file' memory size must be multiple of 2 MiB Signed-off-by: Michal Privoznik Reviewed-by: Philippe Mathieu-Daudé Tested-by: Mario Casquero Message-ID: Signed-off-by: Paolo Bonzini --- backends/hostmem-epc.c | 1 + backends/hostmem-file.c | 1 + backends/hostmem-memfd.c | 1 + backends/hostmem.c | 10 ++++++++++ include/sysemu/hostmem.h | 2 +- 5 files changed, 14 insertions(+), 1 deletion(-) diff --git a/backends/hostmem-epc.c b/backends/hostmem-epc.c index 735e2e1cf8..f58fcf00a1 100644 --- a/backends/hostmem-epc.c +++ b/backends/hostmem-epc.c @@ -36,6 +36,7 @@ sgx_epc_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) return false; } + backend->aligned = true; name = object_get_canonical_path(OBJECT(backend)); ram_flags = (backend->share ? RAM_SHARED : 0) | RAM_PROTECTED; return memory_region_init_ram_from_fd(&backend->mr, OBJECT(backend), name, diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c index 3c69db7946..7e5072e33e 100644 --- a/backends/hostmem-file.c +++ b/backends/hostmem-file.c @@ -80,6 +80,7 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) g_assert_not_reached(); } + backend->aligned = true; name = host_memory_backend_get_name(backend); ram_flags = backend->share ? RAM_SHARED : 0; ram_flags |= fb->readonly ? RAM_READONLY_FD : 0; diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c index 745ead0034..6a3c89a12b 100644 --- a/backends/hostmem-memfd.c +++ b/backends/hostmem-memfd.c @@ -52,6 +52,7 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp) return false; } + backend->aligned = true; name = host_memory_backend_get_name(backend); ram_flags = backend->share ? RAM_SHARED : 0; ram_flags |= backend->reserve ? 0 : RAM_NORESERVE; diff --git a/backends/hostmem.c b/backends/hostmem.c index eb9682b4a8..1edc0ede2a 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -20,6 +20,7 @@ #include "qom/object_interfaces.h" #include "qemu/mmap-alloc.h" #include "qemu/madvise.h" +#include "qemu/cutils.h" #include "hw/qdev-core.h" #ifdef CONFIG_NUMA @@ -325,6 +326,7 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc); void *ptr; uint64_t sz; + size_t pagesize; bool async = !phase_check(PHASE_LATE_BACKENDS_CREATED); if (!bc->alloc) { @@ -336,6 +338,14 @@ host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) ptr = memory_region_get_ram_ptr(&backend->mr); sz = memory_region_size(&backend->mr); + pagesize = qemu_ram_pagesize(backend->mr.ram_block); + + if (backend->aligned && !QEMU_IS_ALIGNED(sz, pagesize)) { + g_autofree char *pagesize_str = size_to_str(pagesize); + error_setg(errp, "backend '%s' memory size must be multiple of %s", + object_get_typename(OBJECT(uc)), pagesize_str); + return; + } if (backend->merge) { qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE); diff --git a/include/sysemu/hostmem.h b/include/sysemu/hostmem.h index 04b884bf42..de47ae59e4 100644 --- a/include/sysemu/hostmem.h +++ b/include/sysemu/hostmem.h @@ -74,7 +74,7 @@ struct HostMemoryBackend { uint64_t size; bool merge, dump, use_canonical_path; bool prealloc, is_mapped, share, reserve; - bool guest_memfd; + bool guest_memfd, aligned; uint32_t prealloc_threads; ThreadContext *prealloc_context; DECLARE_BITMAP(host_nodes, MAX_NODES + 1); From a2b6a96505097c54f5db4c77f66e9c47af4dad22 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 6 Jun 2024 11:33:06 +0200 Subject: [PATCH 21/42] machine, hostmem: improve error messages for unsupported features Detect early unsupported MADV_MERGEABLE and MADV_DONTDUMP, and print a clearer error message that points to the deficiency of the host. Cc: Michal Privoznik Signed-off-by: Paolo Bonzini --- backends/hostmem.c | 16 ++++++++++++++++ hw/core/machine.c | 8 ++++++++ 2 files changed, 24 insertions(+) diff --git a/backends/hostmem.c b/backends/hostmem.c index 1edc0ede2a..6da3d7383e 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -170,6 +170,14 @@ static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) { HostMemoryBackend *backend = MEMORY_BACKEND(obj); + if (QEMU_MADV_MERGEABLE == QEMU_MADV_INVALID) { + if (value) { + error_setg(errp, "Memory merging is not supported on this host"); + } + assert(!backend->merge); + return; + } + if (!host_memory_backend_mr_inited(backend)) { backend->merge = value; return; @@ -196,6 +204,14 @@ static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) { HostMemoryBackend *backend = MEMORY_BACKEND(obj); + if (QEMU_MADV_DONTDUMP == QEMU_MADV_INVALID) { + if (!value) { + error_setg(errp, "Dumping guest memory cannot be disabled on this host"); + } + assert(backend->dump); + return; + } + if (!host_memory_backend_mr_inited(backend)) { backend->dump = value; return; diff --git a/hw/core/machine.c b/hw/core/machine.c index a0ee43ca5c..c93d249244 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -428,6 +428,10 @@ static void machine_set_dump_guest_core(Object *obj, bool value, Error **errp) { MachineState *ms = MACHINE(obj); + if (!value && QEMU_MADV_DONTDUMP == QEMU_MADV_INVALID) { + error_setg(errp, "Dumping guest memory cannot be disabled on this host"); + return; + } ms->dump_guest_core = value; } @@ -442,6 +446,10 @@ static void machine_set_mem_merge(Object *obj, bool value, Error **errp) { MachineState *ms = MACHINE(obj); + if (value && QEMU_MADV_MERGEABLE == QEMU_MADV_INVALID) { + error_setg(errp, "Memory merging is not supported on this host"); + return; + } ms->mem_merge = value; } From 5becdc0ab083e8bf346270cd34cb11b568e7538c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 6 Jun 2024 13:06:56 +0200 Subject: [PATCH 22/42] hostmem: simplify the code for merge and dump properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No semantic change, just simpler control flow. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Paolo Bonzini --- backends/hostmem.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/backends/hostmem.c b/backends/hostmem.c index 6da3d7383e..4e5576a4ad 100644 --- a/backends/hostmem.c +++ b/backends/hostmem.c @@ -178,19 +178,16 @@ static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp) return; } - if (!host_memory_backend_mr_inited(backend)) { - backend->merge = value; - return; - } - - if (value != backend->merge) { + if (!host_memory_backend_mr_inited(backend) && + value != backend->merge) { void *ptr = memory_region_get_ram_ptr(&backend->mr); uint64_t sz = memory_region_size(&backend->mr); qemu_madvise(ptr, sz, value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE); - backend->merge = value; } + + backend->merge = value; } static bool host_memory_backend_get_dump(Object *obj, Error **errp) @@ -212,19 +209,16 @@ static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp) return; } - if (!host_memory_backend_mr_inited(backend)) { - backend->dump = value; - return; - } - - if (value != backend->dump) { + if (host_memory_backend_mr_inited(backend) && + value != backend->dump) { void *ptr = memory_region_get_ram_ptr(&backend->mr); uint64_t sz = memory_region_size(&backend->mr); qemu_madvise(ptr, sz, value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP); - backend->dump = value; } + + backend->dump = value; } static bool host_memory_backend_get_prealloc(Object *obj, Error **errp) From 75997e182b695f2e3f0a2d649734952af5caf3ee Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 4 Jun 2024 18:17:55 +0200 Subject: [PATCH 23/42] scsi-disk: Don't silently truncate serial number Before this commit, scsi-disk accepts a string of arbitrary length for its "serial" property. However, the value visible on the guest is actually truncated to 36 characters. This limitation doesn't come from the SCSI specification, it is an arbitrary limit that was initially picked as 20 and later bumped to 36 by commit 48b62063. Similarly, device_id was introduced as a copy of the serial number, limited to 20 characters, but commit 48b62063 forgot to actually bump it. As long as we silently truncate the given string, extending the limit is actually not a harmless change, but break the guest ABI. This is the most important reason why commit 48b62063 was really wrong (and it's also why we can't change device_id to be in sync with the serial number again and use 36 characters now, it would be another guest ABI breakage). In order to avoid future breakage, don't silently truncate the serial number string any more, but just error out if it would be truncated. Buglink: https://issues.redhat.com/browse/RHEL-3542 Suggested-by: Peter Krempa Signed-off-by: Kevin Wolf Message-ID: <20240604161755.63448-1-kwolf@redhat.com> Signed-off-by: Paolo Bonzini --- hw/scsi/scsi-disk.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 4bd7af9d0c..5f55ae54e4 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -58,6 +58,9 @@ #define TYPE_SCSI_DISK_BASE "scsi-disk-base" +#define MAX_SERIAL_LEN 36 +#define MAX_SERIAL_LEN_FOR_DEVID 20 + OBJECT_DECLARE_TYPE(SCSIDiskState, SCSIDiskClass, SCSI_DISK_BASE) struct SCSIDiskClass { @@ -648,8 +651,8 @@ static int scsi_disk_emulate_vpd_page(SCSIRequest *req, uint8_t *outbuf) } l = strlen(s->serial); - if (l > 36) { - l = 36; + if (l > MAX_SERIAL_LEN) { + l = MAX_SERIAL_LEN; } trace_scsi_disk_emulate_vpd_page_80(req->cmd.xfer); @@ -2501,9 +2504,20 @@ static void scsi_realize(SCSIDevice *dev, Error **errp) if (!s->vendor) { s->vendor = g_strdup("QEMU"); } + if (s->serial && strlen(s->serial) > MAX_SERIAL_LEN) { + error_setg(errp, "The serial number can't be longer than %d characters", + MAX_SERIAL_LEN); + return; + } if (!s->device_id) { if (s->serial) { - s->device_id = g_strdup_printf("%.20s", s->serial); + if (strlen(s->serial) > MAX_SERIAL_LEN_FOR_DEVID) { + error_setg(errp, "The serial number can't be longer than %d " + "characters when it is also used as the default for " + "device_id", MAX_SERIAL_LEN_FOR_DEVID); + return; + } + s->device_id = g_strdup(s->serial); } else { const char *str = blk_name(s->qdev.conf.blk); if (str && *str) { From fcce5287c009770d74f289185eadd6163a1b6a4b Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Wed, 5 Jun 2024 23:25:49 +0800 Subject: [PATCH 24/42] stubs/meson: Fix qemuutil build when --disable-system MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Compiling without system, user, tools or guest-agent fails with the following error message: ./configure --disable-system --disable-user --disable-tools \ --disable-guest-agent error message: /usr/bin/ld: libqemuutil.a.p/util_error-report.c.o: in function `error_printf': /media/liuzhao/data/qemu-cook/build/../util/error-report.c:38: undefined reference to `error_vprintf' /usr/bin/ld: libqemuutil.a.p/util_error-report.c.o: in function `vreport': /media/liuzhao/data/qemu-cook/build/../util/error-report.c:215: undefined reference to `error_vprintf' collect2: error: ld returned 1 exit status This is because tests/bench and tests/unit both need qemuutil, which requires error_vprintf stub when system is disabled. Add error_vprintf stub into stub_ss for all cases other than disabling system. Fixes: 3a15604900c4 ("stubs: include stubs only if needed") Reported-by: Daniel P. Berrangé Signed-off-by: Zhao Liu Message-ID: <20240605152549.1795762-1-zhao1.liu@intel.com> [Include error-printf.c unconditionally. - Paolo] Signed-off-by: Paolo Bonzini --- stubs/meson.build | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/stubs/meson.build b/stubs/meson.build index 3b9d42023c..f15b48d01f 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -3,6 +3,7 @@ # below, so that it is clear who needs the stubbed functionality. stub_ss.add(files('cpu-get-clock.c')) +stub_ss.add(files('error-printf.c')) stub_ss.add(files('fdset.c')) stub_ss.add(files('iothread-lock.c')) stub_ss.add(files('is-daemonized.c')) @@ -45,17 +46,10 @@ if have_block or have_ga stub_ss.add(files('qmp-quit.c')) endif -if have_ga - stub_ss.add(files('error-printf.c')) -endif - if have_block or have_user stub_ss.add(files('qtest.c')) stub_ss.add(files('vm-stop.c')) stub_ss.add(files('vmstate.c')) - - # more symbols provided by the monitor - stub_ss.add(files('error-printf.c')) endif if have_user From 9c267239c7a307645849139389b42641655b7ece Mon Sep 17 00:00:00 2001 From: Phil Dennis-Jordan Date: Wed, 5 Jun 2024 13:25:50 +0200 Subject: [PATCH 25/42] i386/hvf: Adds support for INVTSC cpuid bit This patch adds the INVTSC bit to the Hypervisor.framework accelerator's CPUID bit passthrough allow-list. Previously, specifying +invtsc in the CPU configuration would fail with the following warning despite the host CPU advertising the feature: qemu-system-x86_64: warning: host doesn't support requested feature: CPUID.80000007H:EDX.invtsc [bit 8] x86 macOS itself relies on a fixed rate TSC for its own Mach absolute time timestamp mechanism, so there's no reason we can't enable this bit for guests. When the feature is enabled, a migration blocker is installed. Signed-off-by: Phil Dennis-Jordan Reviewed-by: Roman Bolshakov Tested-by: Roman Bolshakov Message-ID: <20240605112556.43193-2-phil@philjordan.eu> Signed-off-by: Paolo Bonzini --- target/i386/hvf/hvf.c | 18 ++++++++++++++++++ target/i386/hvf/x86_cpuid.c | 4 ++++ 2 files changed, 22 insertions(+) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index e493452acb..e6e916225b 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -49,6 +49,8 @@ #include "qemu/osdep.h" #include "qemu/error-report.h" #include "qemu/memalign.h" +#include "qapi/error.h" +#include "migration/blocker.h" #include "sysemu/hvf.h" #include "sysemu/hvf_int.h" @@ -74,6 +76,8 @@ #include "qemu/accel.h" #include "target/i386/cpu.h" +static Error *invtsc_mig_blocker; + void vmx_update_tpr(CPUState *cpu) { /* TODO: need integrate APIC handling */ @@ -221,6 +225,8 @@ int hvf_arch_init_vcpu(CPUState *cpu) { X86CPU *x86cpu = X86_CPU(cpu); CPUX86State *env = &x86cpu->env; + Error *local_err = NULL; + int r; uint64_t reqCap; init_emu(); @@ -238,6 +244,18 @@ int hvf_arch_init_vcpu(CPUState *cpu) } } + if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && + invtsc_mig_blocker == NULL) { + error_setg(&invtsc_mig_blocker, + "State blocked by non-migratable CPU device (invtsc flag)"); + r = migrate_add_blocker(&invtsc_mig_blocker, &local_err); + if (r < 0) { + error_report_err(local_err); + return r; + } + } + + if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED, &hvf_state->hvf_caps->vmx_cap_pinbased)) { abort(); diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c index 9380b90496..e56cd8411b 100644 --- a/target/i386/hvf/x86_cpuid.c +++ b/target/i386/hvf/x86_cpuid.c @@ -146,6 +146,10 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_OSVW | CPUID_EXT3_XOP | CPUID_EXT3_FMA4 | CPUID_EXT3_TBM; break; + case 0x80000007: + edx &= CPUID_APM_INVTSC; + eax = ebx = ecx = 0; + break; default: return 0; } From 0e4e622e328e203610ac300a140e43d5e2929eda Mon Sep 17 00:00:00 2001 From: Phil Dennis-Jordan Date: Wed, 5 Jun 2024 13:25:51 +0200 Subject: [PATCH 26/42] i386/hvf: Fixes some compilation warnings A bunch of function definitions used empty parentheses instead of (void) syntax, yielding the following warning when building with clang on macOS: warning: a function declaration without a prototype is deprecated in all versions of C [-Wstrict-prototypes] In addition to fixing these function headers, it also fixes what appears to be a typo causing a variable to be unused after initialisation. warning: variable 'entry_ctls' set but not used [-Wunused-but-set-variable] Signed-off-by: Phil Dennis-Jordan Reviewed-by: Roman Bolshakov Tested-by: Roman Bolshakov Message-ID: <20240605112556.43193-3-phil@philjordan.eu> Signed-off-by: Paolo Bonzini --- target/i386/hvf/vmx.h | 3 +-- target/i386/hvf/x86_decode.c | 2 +- target/i386/hvf/x86_emu.c | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 0fffcfa46c..3954ef883d 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -95,8 +95,7 @@ static void enter_long_mode(hv_vcpuid_t vcpu, uint64_t cr0, uint64_t efer) efer |= MSR_EFER_LMA; wvmcs(vcpu, VMCS_GUEST_IA32_EFER, efer); entry_ctls = rvmcs(vcpu, VMCS_ENTRY_CTLS); - wvmcs(vcpu, VMCS_ENTRY_CTLS, rvmcs(vcpu, VMCS_ENTRY_CTLS) | - VM_ENTRY_GUEST_LMA); + wvmcs(vcpu, VMCS_ENTRY_CTLS, entry_ctls | VM_ENTRY_GUEST_LMA); uint64_t guest_tr_ar = rvmcs(vcpu, VMCS_GUEST_TR_ACCESS_RIGHTS); if ((efer & MSR_EFER_LME) && diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c index 3728d7705e..a4a28f113f 100644 --- a/target/i386/hvf/x86_decode.c +++ b/target/i386/hvf/x86_decode.c @@ -2111,7 +2111,7 @@ uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode) return decode->len; } -void init_decoder() +void init_decoder(void) { int i; diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c index 3a3f0a50d0..38c782b8e3 100644 --- a/target/i386/hvf/x86_emu.c +++ b/target/i386/hvf/x86_emu.c @@ -1409,7 +1409,7 @@ static struct cmd_handler { static struct cmd_handler _cmd_handler[X86_DECODE_CMD_LAST]; -static void init_cmd_handler() +static void init_cmd_handler(void) { int i; for (i = 0; i < ARRAY_SIZE(handlers); i++) { @@ -1481,7 +1481,7 @@ bool exec_instruction(CPUX86State *env, struct x86_decode *ins) return true; } -void init_emu() +void init_emu(void) { init_cmd_handler(); } From f21f0cbc2c37c35680317d9dccbcbaf4aedd3a25 Mon Sep 17 00:00:00 2001 From: Phil Dennis-Jordan Date: Wed, 5 Jun 2024 13:25:52 +0200 Subject: [PATCH 27/42] hvf: Consistent types for vCPU handles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit macOS Hypervisor.framework uses different types for identifying vCPUs, hv_vcpu_t or hv_vcpuid_t, depending on host architecture. They are not just differently named typedefs for the same primitive type, but reference different-width integers. Instead of using an integer type and casting where necessary, this change introduces a typedef which resolves the active architecture’s hvf typedef. It also removes a now-unnecessary cast. Signed-off-by: Phil Dennis-Jordan Reviewed-by: Roman Bolshakov Tested-by: Roman Bolshakov Message-ID: <20240605112556.43193-4-phil@philjordan.eu> Signed-off-by: Paolo Bonzini --- accel/hvf/hvf-accel-ops.c | 2 +- include/sysemu/hvf_int.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/accel/hvf/hvf-accel-ops.c b/accel/hvf/hvf-accel-ops.c index 6f1e27ef46..b2a37a2229 100644 --- a/accel/hvf/hvf-accel-ops.c +++ b/accel/hvf/hvf-accel-ops.c @@ -400,7 +400,7 @@ static int hvf_init_vcpu(CPUState *cpu) r = hv_vcpu_create(&cpu->accel->fd, (hv_vcpu_exit_t **)&cpu->accel->exit, NULL); #else - r = hv_vcpu_create((hv_vcpuid_t *)&cpu->accel->fd, HV_VCPU_DEFAULT); + r = hv_vcpu_create(&cpu->accel->fd, HV_VCPU_DEFAULT); #endif cpu->accel->dirty = true; assert_hvf_ok(r); diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h index 4a327fd526..30e739a2b5 100644 --- a/include/sysemu/hvf_int.h +++ b/include/sysemu/hvf_int.h @@ -13,8 +13,10 @@ #ifdef __aarch64__ #include +typedef hv_vcpu_t hvf_vcpuid; #else #include +typedef hv_vcpuid_t hvf_vcpuid; #endif /* hvf_slot flags */ @@ -50,7 +52,7 @@ struct HVFState { extern HVFState *hvf_state; struct AccelCPUState { - uint64_t fd; + hvf_vcpuid fd; void *exit; bool vtimer_masked; sigset_t unblock_ipi_mask; From 3e2c6727cb6b6311ee129c24b561bb87495f1a25 Mon Sep 17 00:00:00 2001 From: Phil Dennis-Jordan Date: Wed, 5 Jun 2024 13:25:53 +0200 Subject: [PATCH 28/42] i386/hvf: Fixes dirty memory tracking by page granularity RX->RWX change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using x86 macOS Hypervisor.framework as accelerator, detection of dirty memory regions is implemented by marking logged memory region slots as read-only in the EPT, then setting the dirty flag when a guest write causes a fault. The area marked dirty should then be marked writable in order for subsequent writes to succeed without a VM exit. However, dirty bits are tracked on a per-page basis, whereas the fault handler was marking the whole logged memory region as writable. This change fixes the fault handler so only the protection of the single faulting page is marked as dirty. (Note: the dirty page tracking appeared to work despite this error because HVF’s hv_vcpu_run() function generated unnecessary EPT fault exits, which ended up causing the dirty marking handler to run even when the memory region had been marked RW. When using hv_vcpu_run_until(), a change planned for a subsequent commit, these spurious exits no longer occur, so dirty memory tracking malfunctions.) Additionally, the dirty page is set to permit code execution, the same as all other guest memory; changing memory protection from RX to RW not RWX appears to have been an oversight. Signed-off-by: Phil Dennis-Jordan Reviewed-by: Roman Bolshakov Tested-by: Roman Bolshakov Message-ID: <20240605112556.43193-5-phil@philjordan.eu> Signed-off-by: Paolo Bonzini --- target/i386/hvf/hvf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index e6e916225b..268c5734d5 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -135,9 +135,10 @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) if (write && slot) { if (slot->flags & HVF_SLOT_LOG) { + uint64_t dirty_page_start = gpa & ~(TARGET_PAGE_SIZE - 1u); memory_region_set_dirty(slot->region, gpa - slot->start, 1); - hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, - HV_MEMORY_READ | HV_MEMORY_WRITE); + hv_vm_protect(dirty_page_start, TARGET_PAGE_SIZE, + HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC); } } From bf9bf2306cc8ea31b2b9bf28002aacb188ec2568 Mon Sep 17 00:00:00 2001 From: Phil Dennis-Jordan Date: Wed, 5 Jun 2024 13:25:54 +0200 Subject: [PATCH 29/42] i386/hvf: In kick_vcpu use hv_vcpu_interrupt to force exit When interrupting a vCPU thread, this patch actually tells the hypervisor to stop running guest code on that vCPU. Calling hv_vcpu_interrupt actually forces a vCPU exit, analogously to hv_vcpus_exit on aarch64. Alternatively, if the vCPU thread is not running the VM, it will immediately cause an exit when it attempts to do so. Previously, hvf_kick_vcpu_thread relied upon hv_vcpu_run returning very frequently, including many spurious exits, which made it less of a problem that nothing was actively done to stop the vCPU thread running guest code. The newer, more efficient hv_vcpu_run_until exits much more rarely, so a true "kick" is needed before switching to that. Signed-off-by: Phil Dennis-Jordan Message-ID: <20240605112556.43193-6-phil@philjordan.eu> Signed-off-by: Paolo Bonzini --- target/i386/hvf/hvf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 268c5734d5..106ac5cbf6 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -215,6 +215,7 @@ static inline bool apic_bus_freq_is_known(CPUX86State *env) void hvf_kick_vcpu_thread(CPUState *cpu) { cpus_kick_thread(cpu); + hv_vcpu_interrupt(&cpu->accel->fd, 1); } int hvf_arch_init(void) From a59f5b2f83d4de113556e5b68cbd68c03f712679 Mon Sep 17 00:00:00 2001 From: Phil Dennis-Jordan Date: Wed, 5 Jun 2024 13:25:55 +0200 Subject: [PATCH 30/42] i386/hvf: Updates API usage to use modern vCPU run function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit macOS 10.15 introduced the more efficient hv_vcpu_run_until() function to supersede hv_vcpu_run(). According to the documentation, there is no longer any reason to use the latter on modern host OS versions, especially after 11.0 added support for an indefinite deadline. Observed behaviour of the newer function is that as documented, it exits much less frequently - and most of the original function’s exits seem to have been effectively pointless. Another reason to use the new function is that it is a prerequisite for using newer features such as in-kernel APIC support. (Not covered by this patch.) This change implements the upgrade by selecting one of three code paths at compile time: two static code paths for the new and old functions respectively, when building for targets where the new function is either not available, or where the built executable won’t run on older platforms lacking the new function anyway. The third code path selects dynamically based on runtime detected availability of the weakly-linked symbol. Signed-off-by: Phil Dennis-Jordan Message-ID: <20240605112556.43193-7-phil@philjordan.eu> Signed-off-by: Paolo Bonzini --- target/i386/hvf/hvf.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 106ac5cbf6..2d0eef6cd9 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -427,6 +427,27 @@ static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } } +static hv_return_t hvf_vcpu_run(hv_vcpuid_t vcpu_id) +{ + /* + * hv_vcpu_run_until is available and recommended from macOS 10.15+, + * HV_DEADLINE_FOREVER from 11.0. Test for availability at runtime and fall + * back to hv_vcpu_run() only where necessary. + */ +#ifndef MAC_OS_VERSION_11_0 + return hv_vcpu_run(vcpu_id); +#elif MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 + return hv_vcpu_run_until(vcpu_id, HV_DEADLINE_FOREVER); +#else /* MAC_OS_X_VERSION_MIN_REQUIRED < MAC_OS_VERSION_11_0 */ + /* 11.0 SDK or newer, but could be < 11 at runtime */ + if (__builtin_available(macOS 11.0, *)) { + return hv_vcpu_run_until(vcpu_id, HV_DEADLINE_FOREVER); + } else { + return hv_vcpu_run(vcpu_id); + } +#endif +} + int hvf_vcpu_exec(CPUState *cpu) { X86CPU *x86_cpu = X86_CPU(cpu); @@ -455,7 +476,7 @@ int hvf_vcpu_exec(CPUState *cpu) return EXCP_HLT; } - hv_return_t r = hv_vcpu_run(cpu->accel->fd); + hv_return_t r = hvf_vcpu_run(cpu->accel->fd); assert_hvf_ok(r); /* handle VMEXIT */ From a3c67dfc140018626f21ee507a726151cdb95ce3 Mon Sep 17 00:00:00 2001 From: Phil Dennis-Jordan Date: Wed, 5 Jun 2024 13:25:56 +0200 Subject: [PATCH 31/42] hvf: Makes assert_hvf_ok report failed expression When a macOS Hypervisor.framework call fails which is checked by assert_hvf_ok(), Qemu exits printing the error value, but not the location in the code, as regular assert() macro expansions would. This change turns assert_hvf_ok() into a macro similar to other assertions, which expands to a call to the corresponding _impl() function together with information about the expression that failed the assertion and its location in the code. Additionally, stringifying the numeric hv_return_t code is factored into a helper function that can be reused for diagnostics and debugging outside of assertions. Signed-off-by: Phil Dennis-Jordan Message-ID: <20240605112556.43193-8-phil@philjordan.eu> Signed-off-by: Paolo Bonzini --- accel/hvf/hvf-all.c | 51 +++++++++++++++++----------------------- include/sysemu/hvf_int.h | 5 +++- 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/accel/hvf/hvf-all.c b/accel/hvf/hvf-all.c index db05b81be5..c008dc2f1e 100644 --- a/accel/hvf/hvf-all.c +++ b/accel/hvf/hvf-all.c @@ -13,40 +13,33 @@ #include "sysemu/hvf.h" #include "sysemu/hvf_int.h" -void assert_hvf_ok(hv_return_t ret) +const char *hvf_return_string(hv_return_t ret) +{ + switch (ret) { + case HV_SUCCESS: return "HV_SUCCESS"; + case HV_ERROR: return "HV_ERROR"; + case HV_BUSY: return "HV_BUSY"; + case HV_BAD_ARGUMENT: return "HV_BAD_ARGUMENT"; + case HV_NO_RESOURCES: return "HV_NO_RESOURCES"; + case HV_NO_DEVICE: return "HV_NO_DEVICE"; + case HV_UNSUPPORTED: return "HV_UNSUPPORTED"; +#if defined(MAC_OS_VERSION_11_0) && \ + MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 + case HV_DENIED: return "HV_DENIED"; +#endif + default: return "[unknown hv_return value]"; + } +} + +void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line, + const char *exp) { if (ret == HV_SUCCESS) { return; } - switch (ret) { - case HV_ERROR: - error_report("Error: HV_ERROR"); - break; - case HV_BUSY: - error_report("Error: HV_BUSY"); - break; - case HV_BAD_ARGUMENT: - error_report("Error: HV_BAD_ARGUMENT"); - break; - case HV_NO_RESOURCES: - error_report("Error: HV_NO_RESOURCES"); - break; - case HV_NO_DEVICE: - error_report("Error: HV_NO_DEVICE"); - break; - case HV_UNSUPPORTED: - error_report("Error: HV_UNSUPPORTED"); - break; -#if defined(MAC_OS_VERSION_11_0) && \ - MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_11_0 - case HV_DENIED: - error_report("Error: HV_DENIED"); - break; -#endif - default: - error_report("Unknown Error"); - } + error_report("Error: %s = %s (0x%x, at %s:%u)", + exp, hvf_return_string(ret), ret, file, line); abort(); } diff --git a/include/sysemu/hvf_int.h b/include/sysemu/hvf_int.h index 30e739a2b5..5b28d17ba1 100644 --- a/include/sysemu/hvf_int.h +++ b/include/sysemu/hvf_int.h @@ -60,7 +60,10 @@ struct AccelCPUState { bool dirty; }; -void assert_hvf_ok(hv_return_t ret); +void assert_hvf_ok_impl(hv_return_t ret, const char *file, unsigned int line, + const char *exp); +#define assert_hvf_ok(EX) assert_hvf_ok_impl((EX), __FILE__, __LINE__, #EX) +const char *hvf_return_string(hv_return_t ret); int hvf_arch_init(void); int hvf_arch_init_vcpu(CPUState *cpu); void hvf_arch_vcpu_destroy(CPUState *cpu); From c1acad9f72d14daf918563eb77d2b31c39fbd06a Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 8 Nov 2023 23:20:07 -0800 Subject: [PATCH 32/42] target/i386: add support for FRED in CPUID enumeration FRED, i.e., the Intel flexible return and event delivery architecture, defines simple new transitions that change privilege level (ring transitions). The new transitions defined by the FRED architecture are FRED event delivery and, for returning from events, two FRED return instructions. FRED event delivery can effect a transition from ring 3 to ring 0, but it is used also to deliver events incident to ring 0. One FRED instruction (ERETU) effects a return from ring 0 to ring 3, while the other (ERETS) returns while remaining in ring 0. Collectively, FRED event delivery and the FRED return instructions are FRED transitions. In addition to these transitions, the FRED architecture defines a new instruction (LKGS) for managing the state of the GS segment register. The LKGS instruction can be used by 64-bit operating systems that do not use the new FRED transitions. WRMSRNS is an instruction that behaves exactly like WRMSR, with the only difference being that it is not a serializing instruction by default. Under certain conditions, WRMSRNS may replace WRMSR to improve performance. FRED uses it to switch RSP0 in a faster manner. Search for the latest FRED spec in most search engines with this search pattern: site:intel.com FRED (flexible return and event delivery) specification The CPUID feature flag CPUID.(EAX=7,ECX=1):EAX[17] enumerates FRED, and the CPUID feature flag CPUID.(EAX=7,ECX=1):EAX[18] enumerates LKGS, and the CPUID feature flag CPUID.(EAX=7,ECX=1):EAX[19] enumerates WRMSRNS. Add CPUID definitions for FRED/LKGS/WRMSRNS, and expose them to KVM guests. Because FRED relies on LKGS and WRMSRNS, add that to feature dependency map. Tested-by: Shan Kang Signed-off-by: Xin Li Message-ID: <20231109072012.8078-2-xin3.li@intel.com> [Fix order of dependencies, add dependencies from LM to FRED. - Paolo] Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 14 +++++++++++++- target/i386/cpu.h | 6 ++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 914bef442c..bfb5a25e59 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1114,7 +1114,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "avx-vnni", "avx512-bf16", NULL, "cmpccxadd", NULL, NULL, "fzrm", "fsrs", "fsrc", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + NULL, "fred", "lkgs", "wrmsrns", NULL, "amx-fp16", NULL, "avx-ifma", NULL, NULL, "lam", NULL, NULL, NULL, NULL, NULL, @@ -1701,6 +1701,18 @@ static FeatureDep feature_dependencies[] = { .from = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG }, .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE }, }, + { + .from = { FEAT_8000_0001_EDX, CPUID_EXT2_LM }, + .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, + }, + { + .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_LKGS }, + .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, + }, + { + .from = { FEAT_7_1_EAX, CPUID_7_1_EAX_WRMSRNS }, + .to = { FEAT_7_1_EAX, CPUID_7_1_EAX_FRED }, + }, }; typedef struct X86RegisterInfo32 { diff --git a/target/i386/cpu.h b/target/i386/cpu.h index c64ef0c1a2..ad3577056d 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -941,6 +941,12 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_7_1_EDX_AMX_COMPLEX (1U << 8) /* PREFETCHIT0/1 Instructions */ #define CPUID_7_1_EDX_PREFETCHITI (1U << 14) +/* Flexible return and event delivery (FRED) */ +#define CPUID_7_1_EAX_FRED (1U << 17) +/* Load into IA32_KERNEL_GS_BASE (LKGS) */ +#define CPUID_7_1_EAX_LKGS (1U << 18) +/* Non-Serializing Write to Model Specific Register (WRMSRNS) */ +#define CPUID_7_1_EAX_WRMSRNS (1U << 19) /* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */ #define CPUID_7_2_EDX_MCDT_NO (1U << 5) From f88ddc40c6d8b591a357108feec52cea13796d2d Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 8 Nov 2023 23:20:08 -0800 Subject: [PATCH 33/42] target/i386: mark CR4.FRED not reserved The CR4.FRED bit, i.e., CR4[32], is no longer a reserved bit when FRED is exposed to guests, otherwise it is still a reserved bit. Tested-by: Shan Kang Signed-off-by: Xin Li Reviewed-by: Zhao Liu Message-ID: <20231109072012.8078-3-xin3.li@intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index ad3577056d..9a582218f4 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -261,6 +261,18 @@ typedef enum X86Seg { #define CR4_PKS_MASK (1U << 24) #define CR4_LAM_SUP_MASK (1U << 28) +#ifdef TARGET_X86_64 +#define CR4_FRED_MASK (1ULL << 32) +#else +#define CR4_FRED_MASK 0 +#endif + +#ifdef TARGET_X86_64 +#define CR4_FRED_MASK (1ULL << 32) +#else +#define CR4_FRED_MASK 0 +#endif + #define CR4_RESERVED_MASK \ (~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \ | CR4_DE_MASK | CR4_PSE_MASK | CR4_PAE_MASK \ @@ -269,7 +281,7 @@ typedef enum X86Seg { | CR4_LA57_MASK \ | CR4_FSGSBASE_MASK | CR4_PCIDE_MASK | CR4_OSXSAVE_MASK \ | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK \ - | CR4_LAM_SUP_MASK)) + | CR4_LAM_SUP_MASK | CR4_FRED_MASK)) #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) @@ -2613,6 +2625,9 @@ static inline uint64_t cr4_reserved_bits(CPUX86State *env) if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_LAM)) { reserved_bits |= CR4_LAM_SUP_MASK; } + if (!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED)) { + reserved_bits |= CR4_FRED_MASK; + } return reserved_bits; } From 2e641870170e28df28c5d9914e76ea7cab141516 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 8 Nov 2023 23:20:10 -0800 Subject: [PATCH 34/42] vmxcap: add support for VMX FRED controls Report secondary vm-exit controls and the VMX controls used to save/load FRED MSRs. Tested-by: Shan Kang Signed-off-by: Xin Li Message-ID: <20231109072012.8078-5-xin3.li@intel.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/vmxcap | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap index 3fb4d5b342..44898d73c2 100755 --- a/scripts/kvm/vmxcap +++ b/scripts/kvm/vmxcap @@ -24,6 +24,7 @@ MSR_IA32_VMX_TRUE_EXIT_CTLS = 0x48F MSR_IA32_VMX_TRUE_ENTRY_CTLS = 0x490 MSR_IA32_VMX_VMFUNC = 0x491 MSR_IA32_VMX_PROCBASED_CTLS3 = 0x492 +MSR_IA32_VMX_EXIT_CTLS2 = 0x493 class msr(object): def __init__(self): @@ -219,11 +220,21 @@ controls = [ 23: 'Clear IA32_BNDCFGS', 24: 'Conceal VM exits from PT', 25: 'Clear IA32_RTIT_CTL', + 31: 'Activate secondary VM-exit controls', }, cap_msr = MSR_IA32_VMX_EXIT_CTLS, true_cap_msr = MSR_IA32_VMX_TRUE_EXIT_CTLS, ), + Allowed1Control( + name = 'secondary VM-Exit controls', + bits = { + 0: 'Save IA32 FRED MSRs', + 1: 'Load IA32 FRED MSRs', + }, + cap_msr = MSR_IA32_VMX_EXIT_CTLS2, + ), + Control( name = 'VM-Entry controls', bits = { @@ -237,6 +248,7 @@ controls = [ 16: 'Load IA32_BNDCFGS', 17: 'Conceal VM entries from PT', 18: 'Load IA32_RTIT_CTL', + 23: 'Load IA32 FRED MSRs', }, cap_msr = MSR_IA32_VMX_ENTRY_CTLS, true_cap_msr = MSR_IA32_VMX_TRUE_ENTRY_CTLS, From ef202d64c3020f3df03c39d3ad688732d81aaae8 Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 8 Nov 2023 23:20:11 -0800 Subject: [PATCH 35/42] target/i386: enumerate VMX nested-exception support Allow VMX nested-exception support to be exposed in KVM guests, thus nested KVM guests can enumerate it. Tested-by: Shan Kang Signed-off-by: Xin Li Message-ID: <20231109072012.8078-6-xin3.li@intel.com> Signed-off-by: Paolo Bonzini --- scripts/kvm/vmxcap | 1 + target/i386/cpu.c | 1 + target/i386/cpu.h | 1 + 3 files changed, 3 insertions(+) diff --git a/scripts/kvm/vmxcap b/scripts/kvm/vmxcap index 44898d73c2..508be19c75 100755 --- a/scripts/kvm/vmxcap +++ b/scripts/kvm/vmxcap @@ -117,6 +117,7 @@ controls = [ 54: 'INS/OUTS instruction information', 55: 'IA32_VMX_TRUE_*_CTLS support', 56: 'Skip checks on event error code', + 58: 'VMX nested exception support', }, msr = MSR_IA32_VMX_BASIC, ), diff --git a/target/i386/cpu.c b/target/i386/cpu.c index bfb5a25e59..383230fa47 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1492,6 +1492,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [54] = "vmx-ins-outs", [55] = "vmx-true-ctls", [56] = "vmx-any-errcode", + [58] = "vmx-nested-exception", }, .msr = { .index = MSR_IA32_VMX_BASIC, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 9a582218f4..8ff27e933d 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1071,6 +1071,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define MSR_VMX_BASIC_INS_OUTS (1ULL << 54) #define MSR_VMX_BASIC_TRUE_CTLS (1ULL << 55) #define MSR_VMX_BASIC_ANY_ERRCODE (1ULL << 56) +#define MSR_VMX_BASIC_NESTED_EXCEPTION (1ULL << 58) #define MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK 0x1Full #define MSR_VMX_MISC_STORE_LMA (1ULL << 5) From 4ebd98eb3ade5957a842da1420bda012eeeaab9c Mon Sep 17 00:00:00 2001 From: Xin Li Date: Wed, 8 Nov 2023 23:20:12 -0800 Subject: [PATCH 36/42] target/i386: Add get/set/migrate support for FRED MSRs FRED CPU states are managed in 9 new FRED MSRs, in addtion to a few existing CPU registers and MSRs, e.g., CR4.FRED and MSR_IA32_PL0_SSP. Save/restore/migrate FRED MSRs if FRED is exposed to the guest. Tested-by: Shan Kang Signed-off-by: Xin Li Message-ID: <20231109072012.8078-7-xin3.li@intel.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 22 +++++++++++++++++++ target/i386/kvm/kvm.c | 49 +++++++++++++++++++++++++++++++++++++++++++ target/i386/machine.c | 28 +++++++++++++++++++++++++ 3 files changed, 99 insertions(+) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 8ff27e933d..29d799adfd 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -538,6 +538,17 @@ typedef enum X86Seg { #define MSR_IA32_XFD 0x000001c4 #define MSR_IA32_XFD_ERR 0x000001c5 +/* FRED MSRs */ +#define MSR_IA32_FRED_RSP0 0x000001cc /* Stack level 0 regular stack pointer */ +#define MSR_IA32_FRED_RSP1 0x000001cd /* Stack level 1 regular stack pointer */ +#define MSR_IA32_FRED_RSP2 0x000001ce /* Stack level 2 regular stack pointer */ +#define MSR_IA32_FRED_RSP3 0x000001cf /* Stack level 3 regular stack pointer */ +#define MSR_IA32_FRED_STKLVLS 0x000001d0 /* FRED exception stack levels */ +#define MSR_IA32_FRED_SSP1 0x000001d1 /* Stack level 1 shadow stack pointer in ring 0 */ +#define MSR_IA32_FRED_SSP2 0x000001d2 /* Stack level 2 shadow stack pointer in ring 0 */ +#define MSR_IA32_FRED_SSP3 0x000001d3 /* Stack level 3 shadow stack pointer in ring 0 */ +#define MSR_IA32_FRED_CONFIG 0x000001d4 /* FRED Entrypoint and interrupt stack level */ + #define MSR_IA32_BNDCFGS 0x00000d90 #define MSR_IA32_XSS 0x00000da0 #define MSR_IA32_UMWAIT_CONTROL 0xe1 @@ -1723,6 +1734,17 @@ typedef struct CPUArchState { target_ulong cstar; target_ulong fmask; target_ulong kernelgsbase; + + /* FRED MSRs */ + uint64_t fred_rsp0; + uint64_t fred_rsp1; + uint64_t fred_rsp2; + uint64_t fred_rsp3; + uint64_t fred_stklvls; + uint64_t fred_ssp1; + uint64_t fred_ssp2; + uint64_t fred_ssp3; + uint64_t fred_config; #endif uint64_t tsc_adjust; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 0852ed077f..b563520981 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -3376,6 +3376,17 @@ static int kvm_put_msrs(X86CPU *cpu, int level) kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, env->kernelgsbase); kvm_msr_entry_add(cpu, MSR_FMASK, env->fmask); kvm_msr_entry_add(cpu, MSR_LSTAR, env->lstar); + if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) { + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, env->fred_rsp0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, env->fred_rsp1); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, env->fred_rsp2); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, env->fred_rsp3); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, env->fred_stklvls); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, env->fred_ssp1); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, env->fred_ssp2); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, env->fred_ssp3); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, env->fred_config); + } } #endif @@ -3848,6 +3859,17 @@ static int kvm_get_msrs(X86CPU *cpu) kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, 0); kvm_msr_entry_add(cpu, MSR_FMASK, 0); kvm_msr_entry_add(cpu, MSR_LSTAR, 0); + if (env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED) { + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP0, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP1, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP2, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_RSP3, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_STKLVLS, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP1, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP2, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_SSP3, 0); + kvm_msr_entry_add(cpu, MSR_IA32_FRED_CONFIG, 0); + } } #endif kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, 0); @@ -4069,6 +4091,33 @@ static int kvm_get_msrs(X86CPU *cpu) case MSR_LSTAR: env->lstar = msrs[i].data; break; + case MSR_IA32_FRED_RSP0: + env->fred_rsp0 = msrs[i].data; + break; + case MSR_IA32_FRED_RSP1: + env->fred_rsp1 = msrs[i].data; + break; + case MSR_IA32_FRED_RSP2: + env->fred_rsp2 = msrs[i].data; + break; + case MSR_IA32_FRED_RSP3: + env->fred_rsp3 = msrs[i].data; + break; + case MSR_IA32_FRED_STKLVLS: + env->fred_stklvls = msrs[i].data; + break; + case MSR_IA32_FRED_SSP1: + env->fred_ssp1 = msrs[i].data; + break; + case MSR_IA32_FRED_SSP2: + env->fred_ssp2 = msrs[i].data; + break; + case MSR_IA32_FRED_SSP3: + env->fred_ssp3 = msrs[i].data; + break; + case MSR_IA32_FRED_CONFIG: + env->fred_config = msrs[i].data; + break; #endif case MSR_IA32_TSC: env->tsc = msrs[i].data; diff --git a/target/i386/machine.c b/target/i386/machine.c index c3ae320814..39f8294f27 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1544,6 +1544,33 @@ static const VMStateDescription vmstate_msr_xfd = { }; #ifdef TARGET_X86_64 +static bool intel_fred_msrs_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return !!(env->features[FEAT_7_1_EAX] & CPUID_7_1_EAX_FRED); +} + +static const VMStateDescription vmstate_msr_fred = { + .name = "cpu/fred", + .version_id = 1, + .minimum_version_id = 1, + .needed = intel_fred_msrs_needed, + .fields = (VMStateField[]) { + VMSTATE_UINT64(env.fred_rsp0, X86CPU), + VMSTATE_UINT64(env.fred_rsp1, X86CPU), + VMSTATE_UINT64(env.fred_rsp2, X86CPU), + VMSTATE_UINT64(env.fred_rsp3, X86CPU), + VMSTATE_UINT64(env.fred_stklvls, X86CPU), + VMSTATE_UINT64(env.fred_ssp1, X86CPU), + VMSTATE_UINT64(env.fred_ssp2, X86CPU), + VMSTATE_UINT64(env.fred_ssp3, X86CPU), + VMSTATE_UINT64(env.fred_config, X86CPU), + VMSTATE_END_OF_LIST() + } + }; + static bool amx_xtile_needed(void *opaque) { X86CPU *cpu = opaque; @@ -1747,6 +1774,7 @@ const VMStateDescription vmstate_x86_cpu = { &vmstate_pdptrs, &vmstate_msr_xfd, #ifdef TARGET_X86_64 + &vmstate_msr_fred, &vmstate_amx_xtile, #endif &vmstate_arch_lbr, From 888788dd7620f8e3fa14108eab2d708c16460266 Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Thu, 6 Jun 2024 16:54:36 +0800 Subject: [PATCH 37/42] docs: i386: pc: Avoid mentioning limit of maximum vCPUs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Different versions of PC machine support different maximum vCPUs, and even different features have limits on the maximum number of vCPUs ( For example, if x2apic is not enabled in the TCG case, the maximum of 255 vCPUs are supported). It is difficult to list the maximum vCPUs under all restrictions. Thus, to avoid confusion, avoid mentioning specific maximum vCPU number limitations here. Suggested-by: Daniel P. Berrangé Signed-off-by: Zhao Liu Reviewed-by: Daniel P. Berrangé Message-ID: <20240606085436.2028900-1-zhao1.liu@intel.com> Signed-off-by: Paolo Bonzini --- docs/system/target-i386-desc.rst.inc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/system/target-i386-desc.rst.inc b/docs/system/target-i386-desc.rst.inc index 319e540573..ae312b1c1e 100644 --- a/docs/system/target-i386-desc.rst.inc +++ b/docs/system/target-i386-desc.rst.inc @@ -36,7 +36,8 @@ The QEMU PC System emulator simulates the following peripherals: - PCI UHCI, OHCI, EHCI or XHCI USB controller and a virtual USB-1.1 hub. -SMP is supported with up to 255 CPUs (and 4096 CPUs for PC Q35 machine). +SMP is supported with a large number of virtual CPUs (upper limit is +configuration dependent). QEMU uses the PC BIOS from the Seabios project and the Plex86/Bochs LGPL VGA BIOS. From 4b77512b2782a6b48691d4341991491de26415de Mon Sep 17 00:00:00 2001 From: John Allen Date: Mon, 3 Jun 2024 19:36:20 +0000 Subject: [PATCH 38/42] i386: Fix MCE support for AMD hosts For the most part, AMD hosts can use the same MCE injection code as Intel, but there are instances where the qemu implementation is Intel specific. First, MCE delivery works differently on AMD and does not support broadcast. Second, kvm_mce_inject generates MCEs that include a number of Intel specific status bits. Modify kvm_mce_inject to properly generate MCEs on AMD platforms. Reported-by: William Roche Signed-off-by: John Allen Message-ID: <20240603193622.47156-2-john.allen@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.h | 2 ++ target/i386/helper.c | 4 ++++ target/i386/kvm/kvm.c | 39 +++++++++++++++++++++++++++++++-------- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 29d799adfd..e6d5d1b483 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -377,6 +377,8 @@ typedef enum X86Seg { #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ #define MCI_STATUS_AR (1ULL<<55) /* Action required */ +#define MCI_STATUS_DEFERRED (1ULL<<44) /* Deferred error */ +#define MCI_STATUS_POISON (1ULL<<43) /* Poisoned data consumed */ /* MISC register defines */ #define MCM_ADDR_SEGOFF 0 /* segment offset */ diff --git a/target/i386/helper.c b/target/i386/helper.c index f9d1381f90..01a268a30b 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -91,6 +91,10 @@ int cpu_x86_support_mca_broadcast(CPUX86State *env) int family = 0; int model = 0; + if (IS_AMD_CPU(env)) { + return 0; + } + cpu_x86_version(env, &family, &model); if ((family == 6 && model >= 14) || family > 6) { return 1; diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index b563520981..55a9e8a70c 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -638,17 +638,40 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code) { CPUState *cs = CPU(cpu); CPUX86State *env = &cpu->env; - uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | - MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S; - uint64_t mcg_status = MCG_STATUS_MCIP; + uint64_t status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_MISCV | + MCI_STATUS_ADDRV; + uint64_t mcg_status = MCG_STATUS_MCIP | MCG_STATUS_RIPV; int flags = 0; - if (code == BUS_MCEERR_AR) { - status |= MCI_STATUS_AR | 0x134; - mcg_status |= MCG_STATUS_RIPV | MCG_STATUS_EIPV; + if (!IS_AMD_CPU(env)) { + status |= MCI_STATUS_S | MCI_STATUS_UC; + if (code == BUS_MCEERR_AR) { + status |= MCI_STATUS_AR | 0x134; + mcg_status |= MCG_STATUS_EIPV; + } else { + status |= 0xc0; + } } else { - status |= 0xc0; - mcg_status |= MCG_STATUS_RIPV; + if (code == BUS_MCEERR_AR) { + status |= MCI_STATUS_UC | MCI_STATUS_POISON; + mcg_status |= MCG_STATUS_EIPV; + } else { + /* Setting the POISON bit for deferred errors indicates to the + * guest kernel that the address provided by the MCE is valid + * and usable which will ensure that the guest kernel will send + * a SIGBUS_AO signal to the guest process. This allows for + * more desirable behavior in the case that the guest process + * with poisoned memory has set the MCE_KILL_EARLY prctl flag + * which indicates that the process would prefer to handle or + * shutdown due to the poisoned memory condition before the + * memory has been accessed. + * + * While the POISON bit would not be set in a deferred error + * sent from hardware, the bit is not meaningful for deferred + * errors and can be reused in this scenario. + */ + status |= MCI_STATUS_DEFERRED | MCI_STATUS_POISON; + } } flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0; From 2ba8b7ee63589d4063c3b8dff3b70dbf9e224fc6 Mon Sep 17 00:00:00 2001 From: John Allen Date: Mon, 3 Jun 2024 19:36:21 +0000 Subject: [PATCH 39/42] i386: Add support for SUCCOR feature Add cpuid bit definition for the SUCCOR feature. This cpuid bit is required to be exposed to guests to allow them to handle machine check exceptions on AMD hosts. ---- v2: - Add "succor" feature word. - Add case to kvm_arch_get_supported_cpuid for the SUCCOR feature. Reported-by: William Roche Reviewed-by: Joao Martins Signed-off-by: John Allen Message-ID: <20240603193622.47156-3-john.allen@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 18 +++++++++++++++++- target/i386/cpu.h | 4 ++++ target/i386/kvm/kvm.c | 2 ++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 383230fa47..c5a532a254 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1180,6 +1180,22 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .tcg_features = TCG_APM_FEATURES, .unmigratable_flags = CPUID_APM_INVTSC, }, + [FEAT_8000_0007_EBX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, "succor", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x80000007, .reg = R_EBX, }, + .tcg_features = 0, + .unmigratable_flags = 0, + }, [FEAT_8000_0008_EBX] = { .type = CPUID_FEATURE_WORD, .feat_names = { @@ -6887,7 +6903,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x80000007: *eax = 0; - *ebx = 0; + *ebx = env->features[FEAT_8000_0007_EBX]; *ecx = 0; *edx = env->features[FEAT_8000_0007_EDX]; break; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index e6d5d1b483..6786055ec6 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -630,6 +630,7 @@ typedef enum FeatureWord { FEAT_7_1_EAX, /* CPUID[EAX=7,ECX=1].EAX */ FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */ FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ + FEAT_8000_0007_EBX, /* CPUID[8000_0007].EBX */ FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ @@ -982,6 +983,9 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, /* Packets which contain IP payload have LIP values */ #define CPUID_14_0_ECX_LIP (1U << 31) +/* RAS Features */ +#define CPUID_8000_0007_EBX_SUCCOR (1U << 1) + /* CLZERO instruction */ #define CPUID_8000_0008_EBX_CLZERO (1U << 0) /* Always save/restore FP error pointers */ diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 55a9e8a70c..56d8e2a89e 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -532,6 +532,8 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, */ cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX); ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES; + } else if (function == 0x80000007 && reg == R_EBX) { + ret |= CPUID_8000_0007_EBX_SUCCOR; } else if (function == KVM_CPUID_FEATURES && reg == R_EAX) { /* kvm_pv_unhalt is reported by GET_SUPPORTED_CPUID, but it can't * be enabled without the in-kernel irqchip From 1ea1432199cdddbb4e7f98cee71cabf50a9516f2 Mon Sep 17 00:00:00 2001 From: John Allen Date: Mon, 3 Jun 2024 19:36:22 +0000 Subject: [PATCH 40/42] i386: Add support for overflow recovery Add cpuid bit definition for overflow recovery. This is needed in the case where a deferred error has been sent to the guest, a guest process accesses the poisoned memory, but the machine_check_poll function has not yet handled the original deferred error. If overflow recovery is not set in this case, when we handle the uncorrected error from the poisoned memory access, the overflow bit will be set and will result in the guest being shut down. By the time the MCE reaches the guest, the overflow has been handled by the host and has not caused a shutdown, so include the bit unconditionally. Signed-off-by: John Allen Message-ID: <20240603193622.47156-4-john.allen@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 2 +- target/i386/cpu.h | 1 + target/i386/kvm/kvm.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c5a532a254..7466217d5e 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1183,7 +1183,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_8000_0007_EBX] = { .type = CPUID_FEATURE_WORD, .feat_names = { - NULL, "succor", NULL, NULL, + "overflow-recov", "succor", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 6786055ec6..8fe28b67e0 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -984,6 +984,7 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, #define CPUID_14_0_ECX_LIP (1U << 31) /* RAS Features */ +#define CPUID_8000_0007_EBX_OVERFLOW_RECOV (1U << 0) #define CPUID_8000_0007_EBX_SUCCOR (1U << 1) /* CLZERO instruction */ diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 56d8e2a89e..912f5d5a6b 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -533,7 +533,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX); ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES; } else if (function == 0x80000007 && reg == R_EBX) { - ret |= CPUID_8000_0007_EBX_SUCCOR; + ret |= CPUID_8000_0007_EBX_OVERFLOW_RECOV | CPUID_8000_0007_EBX_SUCCOR; } else if (function == KVM_CPUID_FEATURES && reg == R_EAX) { /* kvm_pv_unhalt is reported by GET_SUPPORTED_CPUID, but it can't * be enabled without the in-kernel irqchip From 1f97715c8390e582f154d8b579c70779bd8c9bdf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 9 Aug 2023 01:10:34 +0200 Subject: [PATCH 41/42] Revert "python: use vendored tomli" Now that Ubuntu 20.04 is not included anymore, there is no need to ship it as part of QEMU; Ubuntu 22.04 includes it and Leap users anyway need to install all the required dependencies from PyPI. This mostly reverts commit ec77ee7634de123b7c899739711000fd21dab68b, with just some changes to the wording. Signed-off-by: Paolo Bonzini --- configure | 4 ---- docs/devel/build-system.rst | 13 ++++++------- python/scripts/vendor.py | 3 --- python/wheels/tomli-2.0.1-py3-none-any.whl | Bin 12757 -> 0 bytes 4 files changed, 6 insertions(+), 14 deletions(-) delete mode 100644 python/wheels/tomli-2.0.1-py3-none-any.whl diff --git a/configure b/configure index 4d01a42ba6..5ad1674ca5 100755 --- a/configure +++ b/configure @@ -955,10 +955,6 @@ mkvenv="$python ${source_path}/python/scripts/mkvenv.py" # Finish preparing the virtual environment using vendored .whl files -if $python -c 'import sys; sys.exit(sys.version_info >= (3,11))'; then - $mkvenv ensure --dir "${source_path}/python/wheels" \ - 'tomli>=1.2.0' || exit 1 -fi $mkvenv ensuregroup --dir "${source_path}/python/wheels" \ ${source_path}/pythondeps.toml meson || exit 1 diff --git a/docs/devel/build-system.rst b/docs/devel/build-system.rst index 09caf2f8e1..f4fd76117d 100644 --- a/docs/devel/build-system.rst +++ b/docs/devel/build-system.rst @@ -185,14 +185,13 @@ Bundled Python packages Python packages that are **mandatory** dependencies to build QEMU, but are not available in all supported distros, are bundled with the -QEMU sources. Currently this includes Meson (outdated in CentOS 8 -and derivatives, Ubuntu 20.04 and 22.04, and openSUSE Leap) and tomli -(absent in Ubuntu 20.04). +QEMU sources. The only one is currently Meson (outdated in Ubuntu +22.04 and openSUSE Leap). -If you need to update these, please do so by modifying and rerunning -``python/scripts/vendor.py``. This script embeds the sha256 hash of -package sources and checks it. The pypi.org web site provides an easy -way to retrieve the sha256 hash of the sources. +In order to include a new or updated wheel, modify and rerun the +``python/scripts/vendor.py`` script. The script embeds the +sha256 hash of package sources and checks it. The pypi.org web site +provides an easy way to retrieve the sha256 hash of the sources. Stage 2: Meson diff --git a/python/scripts/vendor.py b/python/scripts/vendor.py index 1038b14ae0..07aff97cca 100755 --- a/python/scripts/vendor.py +++ b/python/scripts/vendor.py @@ -43,9 +43,6 @@ def main() -> int: packages = { "meson==1.2.3": "4533a43c34548edd1f63a276a42690fce15bde9409bcf20c4b8fa3d7e4d7cac1", - - "tomli==2.0.1": - "939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", } vendor_dir = Path(__file__, "..", "..", "wheels").resolve() diff --git a/python/wheels/tomli-2.0.1-py3-none-any.whl b/python/wheels/tomli-2.0.1-py3-none-any.whl deleted file mode 100644 index 29670b98d16e2bc770d4fea718582e1dc0dd8aca..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12757 zcmZ{K19T?cvi6(2v8{=1TN67I+qN;W?TKyMwlT47dy-6?{B!QT=Y02k_dnfhuU_4& zpQrb(uBz4bbjeEt!O#Ez02H7}RYJMAoa`MF1OSNoGm!sWb+)sywqVfHv#_;r*3+Z6 zch`}hi0BtU>O7{2a9ah3PcNtq)hCA71L7L+!5ht;=`*8pC+Iw0f6veEbjEa6aZ!t{ z-7eZr5K1xTPCr1$E}2eui*cb3n04x0V575s9eHtd3AnP2al@G=Ow7|XTHLl>i@Uu~ zQ2;lqTW6Tfh`@q@wZc7Dx~3&*V^rA36cr>aDz~D#4UVmCEh*IfIByKl8D&D${Pjy<~={mz=uH0 z;OKxG#BXMAx=t8hwKuh0k+D=iu>QdmGLYQ_>$QTHK(gPU>jK%Hve4f5qUJ)GmUw5^ zilT(pi7l$g-h5`|Q5ZK@Iy|sLApdVx1nrKoh{6B>EYtx2q`$0aZ{X-;;`qmiL%gew z>k{$TZxNPH{>B-G()C>|lj^cyTE!*UBx4BSTcyxf1Z1UB>@D~ux!JK>x35$}xoWyMldsqH<45+0P?zA!=C zrmO~iH3qtRxp@te>kbb1y{B()f+NS~wt=ylY5e5Z@z!@|J%+wpvA{KvWSnT=G#FS9 zx`qzx-jZob`TJmBX$^<>dw!!_f*H6^ke z+osYFw-rsfaF|qDyxGvI*_|vQ^>NGWh1~?hFJ~V(sT=WuCT4KQ^}VQrwHrE4@PeNa zJ@3x2Uu>Q?ylKO2yADm5HTL2^D}yv6kP9_iN{F03?unuwnodf2mgb1uY)iSNRll9v zXZ+n&d*q&2W6FEl)Eim{s+XBAt(HCe9O@hsU^(M;|h~dZgYrt!2 zm7oomE|;Ut2;g?~{N3WDXo5e3_L<}k1wsCeS5kjtK)m@r-yOb8gRns_Fw#8rG zE=5nBoH$F!&665^%LM5GYn_l5||zSlb;T zPfBsV-1?-Wb_u-H@@SvC@GGa&q2iPKgvZp;{OlgV^!`OwS1b&0`>$x3p7%)<*zJbL zyFg7sq;WO;yuv~w%=ZDnrcdhcRqW|CsuNskql<)quW*4wBz1ujWy0xCtXouY zkK9448_4P&GU{B`4KWU*h^L;JB-;dc)>&p*u-T?wANcCm&Yl;-C!57t)h7*LhkX*z zoeYgBmWEI$_|=^}SH)Wa9m97qe53&sL>yT1`skfRvP6JgCO8P=41{4CBmzI6jst^K z8J8wzB2qYS^xfcbK>RBi^(N_+FWM0`hgMTxE zL|v$y=eVB49Qew9_W|LUFanUp!|!%7OaPh1&wro~!kV1zhD)C}-H0T!6uu^I?f^p|7ZDsKSgrTwNWJir;O)LV=?7bwZ~3-lAZrNHYo2k`4GU27 z`X04*5_Afhs%sN%;1Mmi0JX!e(AWEMbULIE@+6>(esH%$x}Jb(~vJWJ7@Qf{$44 z7e~`MjMB^Qul>&``bN2BBtbO&yYN&p;~W+Q)6}oI*a(bF=-i0mR1#y5_sE>#Sikl~ zgsA#kaDi~Ytf$zBvbkx!qnNJ+kr-bt=U1Q5>Qf?T7lSO?4Bigg1<{PF*CpLsb7x@a zf|0%Z_q(M-4E05Ho@`*^va_Y>ir^La2nE}xYHb~4UHl2hi&x<9ifsjhIC8A^1NA*P zae)d_0s_E`6^YE?dc<1pES|*%EB&&SHi+apoIDWuUbTU=qSOU|``o5SZlVu@|F)XX zFOYuW0pw-mOX2&3RFpBMCTF5f>al_lZRZ$LMLv0Uict{f9(-I}S(|zTk8K4*Fghfu zMW_@-9w6RFTX%*o4>EYs)IxqqPs}o9lvVF4i3j`^rG+hy$vB|#0>T<&#Gua^`S?$Q z-M%0Oy?aos(hIu zNyBgOO4B$lGAspX1-ABRdI1n)kREccqQQ%*&@pl4jt`vs3Pfr{utt-{gI7;tWIh8RC+#@megPSS`mU#fH7)V&_Opu`m;3Y82H>SSO@ zXwpwC!!D5^Bz`DbM1(GADdp+uudt_;GAk#5N0@?!sSyrGdTsU5^GW%1JUpST1Qx(( z=QqK@q*Z~C<*OjoQ-*Ufp0x%D4g2-iV(UQ&Xf_9#5#p3+EwO>w1<@vU&DYLpLNnIp zR8%VpfUUZFuqV_z3Yp&lWOTmr$vt$c9Wr%EkV}pj>+7q7dr$(Eq`-{f=f0tv z5TPhWfmG`}P=W_sTY4i>yd^?lZo!DC{wA~3p}x(pBLYmC#K3c{;ykf9Wx7}+XNhYd z!8GJk*-1VHc!UF-81|bQ8I+m7@0-ihnjwAvt(7tYYvArt1@9%Sdpy3ue)|f-JdYe^ zVrx~anRs*l63W}}f0EvAi`<|TProaOq;QZe7m!O3Z#Gh?)e<$9i}Voaj9=I@;y*g= zzzB(Q(74XIHs@v8$By_yK1a4nV2%P_|I<(N;5n7>E0yzLXc>OIfOJhHj<93{N0f;Z zm1VoE(ZmCJ1CHsE8tIS;n<@brB(3*@GKj9!L!m2Pz1it#IG()=hYT@Q-U9gYfg=9( z*0KhumaYq#jrr*mrP)u&8WZf0j6q(m7ilOV{eGC0nm^7sOxLlQO-Lom7y=uM4FCaaPhQVi1j8TY21d4uA-&zcAjg)WTa8$ zCbQG1WXx1bYxTQ{W@V{wy-ih>sw&T-;~v(O=RF!)JqqA;plP>jili1riMuqviFPv& zbXddoJQZxbkT z`^BTGlo-#6cq3B!Xa(4ic(TU#Ke|lib)r5#zXt0vhL$H##s?qMHJq$uiWUtq@_1C{ zNbouNw>boNqv4ff&!M6)Q$!5LPUJ&WBt5irMSS0)b%{ink)g-$V(%?|2tXozK)*GU zE)Ucf&-{(oF#SN7n%q+C&((i|)+ABgX#@lhSX%^g&>)mZUu}n{amF^tRPv7o@bT^% zwn+6G)S6p>sDTT7FB_KQfrBrxTgMEp-XgoIYc0BrF7B^ofthG9qG#CemO|7d_t?)w z0T))-K@nTua0;0BPlxs-T#cP{`6a~dh^NC2`s4wv=c2~LOvjR&0_HsF+Yp5Fv$;z1 zRsCiy)m7Zd*2tT^d86^Hz6Q<3N9kUF;PpdQK4Oj*n;4H_gh&UmI$vYLMp+Bxcdhyu z4KBnBDVdpQros#14&PFryt=ADdmxUD(4%oWv=*KWWbl|jA;xoa#C1?5KSdc^e6*NH z0addwfm6y_@<~y??luw8py(m0nJTJzEVhY~*P4rCi8#*9P^4YK+JrsKaFG+9FAd%BU-wlFb>s)bF!qtt<@iZ#a}4moI%LMZK`m z^|)dI{p7m70> z<0vjX*2>j$i-j`yf_TvIq|iYFn#!of$=$yPR<|ispw1X6@m2pQFg(J}c!r3uw_+Nh zD9T}9>tQZw|B^nrrTb->U)%?4--ANX$$BcnMrE7PgC|y>#jdzKVrQx-?2-&ex+BSw zRh8C9=@xY|zphKO(6v$HLgccs`s|KeT!|#)xGKULNx5>3{p>!{(dzA@i%ZN_az*#& zT6xebM&peVw1n}TkyKT;WqBKGO8&%`1{|lnOBTyxFA11BE{Zil`)jAQvS=Ze1OjJX zIT-so=E8%8IG>Axxu?~|I6gZj|1oES;L%Yk;{G|2;F_(%2#9BhyvigEV9_CcOL{UE$XCE&$PK|#9a9zpTt8lQ2SXi zBkJ|&33G7X=O|;X0kUHX__knsWVhxf`)j+~Y8x&d&-3+ecyndp`ve>^XzD(6SMzAb zkV5fXca+`C-b?TVj&7wvE|8GJT`9BY05ND<;IVlf?C=g>{zFq~htXow&FfCnsCll+ zo5nmtY(QIbMITq2kK<8_?5qLO#QrQE%HTjoQ6^7S2DKZ`UHM|Yk_@P2fTGnTQr|NL zRkuU0^v$oRr0RXrFV3?TGl{j)%o^=+-aIo7eC1^ zDsnz-957J?jq$ma#%@VxyYxA*wR6K`Xjj7!C!zc~x+7jVMpwu>bPzgH%gKDzJ^FQ@ zSO0n20f80WN?yt@SLmhJWufJ6HXhMvK{_kJ{>Da@lKUb~kD5|*TWqBVv#x8dbA^4^ ziu^y45qWwH2c9!$+wwy7lWNpm2?B4YQKo0o?kTuWv-gn7Ox6-Vf833#L7EvHb&JkF ze6xlvB;kVtDqYRSUtPN~zshd2l8}KDEv2(a4jHm-dLwhju{2rJi)+|`tN$k9u?@4& zAXlI&x5*YWwbw+dR<buC^u01$(^=7v-jq$HUTL>~hB&@A zV?jjCP=}o)YRcZ^=dvc5z-LurkCjuye5qEc%nPv=pttZ?cgS?rAuA6!n!GZno@Ea2 zIPI;6Ql)9D6L6^q(rb*RNz=`qkt~)K1$G^%60<4e=c{feFb>fM4{? z`%%qB@pWgP963UwH#DdZFLmAocN)D`Z$^7mGKJbxooC{ADx|8VJhc14&e%+WHs88; zw5X#!`3~!?e%0rKh$LD{XCh>WSHl2xLOj14dFMmEkaKg|3EYiMO>oa8x_FDsPUK`S zCMV%~Ue9@%ky1bkQSxR*rZxxJg2#Jlb$zAsCS|{h;8!+(`sG&+eYb0a)Ls z0-q_pjtWG>bSr9=+;n-QtZNQDzA{IYSS4=~mZ$}`yV#1kjvm~2sYQ{y4>pus)<3R)x1msCP z-Sp(Zx-3{OqEO$RFS6OwW-jfa2r=3XPcjKbH$u#$^!!rUZ&{H_tmkgT=Q0$QCY8$_}tD3an(x!31U=y}OT#05b zqkC>8%PX<;dl4%1B>N)9LA-T^C%3pX63S3zZjz^$=Z^{#7cC`G8YOMbu|!ZywIo4v z_wDM!F!TK`TG)f#>PfJ7_r*R@iAX8my|M^TaZc%dVgf@zvzy14b{J4}Pyx63F+y(J zzZ`vt>t@)7jJB~IA*|MvJwUa&ps;{7Iu%Kx=aVU(MyrPlQ#2B`IY_1{ETP7~g?UsV zF_@*A(w^WaHV^F22x~CK*sFJ?#T9xF`uzHcmPm2?xLS9GOtsC?VxO%al%oL||4E_M z4Z7;2b}pqfgqSmv{MtT$fns+Y_fCPooJe;&((!%dYfnJ|ZPs^R_q*RVMSZigvszCj zGj?7374=fOZwu+%A~e_(__}d0k>H%&6k!V5|8U zYq~?1$HOCePPgQL3=F%6S#ekwq3dRi6$`Hr$5Hl3bmK}uWCW4w=gSg>YkZP{bt#(J& zY!93i+r*@RkCBsRMx8it^aR^KJvrCFsiTtB+I79asvw zYmK6IJ?{h){Osi7PtN#}6nB4yQB&!=!-&ZnRM^>30&sJTl-AK%yNAbX_i$->wHU=g zsP?WN5tlT>2t#Kv`b>5)l;BmEo<_9tTX+nfu2(K)l5s$d`_S?7cxj_puukOG{KG zh}Dgcc!!(ktFBhn9g-+dTk=gSj?Vf$P!rZ~3yQ5uznyq>!s2vY=Ich}zLeI+OIoc1 z!ti!UY4%LmAYi(`UGsZ&=WyFY9ccS@bM_n^JU*W;1@n_(dOX9Fh73_UJm!x2> zH+lK5@dI|vXRnU?B+wVbb7cf5*C2$`@^>yac{l(G^Tm`$EFl<%4;#^^qClfb-)*{! zuA#oV&mwb3`S2|=4OIhx<*#W8Y_SNI#u$zI2&{1D1T7&HM+w`faj~mcSNe+~#Uqd{ zgPpftulU?hgj~cqnw)QlYjg9FkYEF43_gCyLwH+5?%;f%Z1k4AMQHcG;Ofv-uM|N; z8SCVQD2LF|g0cLVUXU@}sX-Iy1T)B?9?C!2(2m2{Zm^C3%8V$ZB8=r z(kKW{wPph7TUgPW%{?mm1TWE{=D=t23IyDP`31E?WLeP$&J|%!s&;keGS|6bSVH#; zg=Y9&pk(=_a*>k0k*1H=2QSflRMMDsxx}4jG*1fKAR^OKM|oup08ZJ#n_(AuAcChF z0<8uclEkc+Ri~{AJB5aCvWIoaLG3)2mIOJVT2*=~bSF`Ybavw&Jp@tAonz`Jl3q-C zO^pOX%+H93riid(KxMuj8a3|=?=2r)hK$`PAM)@XwX7I=H13!a3Xv+>8OR4fSa+kd zh?Y>Y93RIC^ghqFoAZhteNn1EhS(fzbdZ+1Q35<(ss{Lulnm z%??QF!ONlRv*~h%6^)Iu9qV2j_-uLNInnqmx^p&$bWeGBL7q35L+~x9!3~cMsl9t3 zpYf~dly5Pl5TCH_W|Dc;uXM)M)FD~FA}5bO$>ttyiS7aXd!A~zMll=ulb=5R48*_k zl(W0NiPN7{#TVUYIUoQha{Y=Ppp7?wgmpsHni|BeABGsbGf=Fm{iCVT++_5_o5SYR zW$R}KdS?4_>;sQCF@-jPm~jD3p6#yN$>tem3tH`u)TCmD^IKc36<(X{yvHbi{hkfm z!O@}*3YX!DqS2%af)R(>#FXm!D! zLBHzKS6pP?hExueZ`F6{weWlJx*w3oPRw}ap(gfrsjx3hyn8Tb?fBXhWrc%B-5~$I zVj3#I6Yigt;`|v1|5~xVJN-Y_Y^ekfOwUNqL~m^2UiwyV^9%D464n`F%z?2PlBn;Y_7>!U19+@k9ZT$CkXcs83E0A@Vb}VQ z`@8)25LajG!tjDrxgoN#FfT6$R5unlIAq#0-AWbZ$Esai^f90;)Q_a^_w_jWPKHy`fbZY`=C245zZT1-2b? zb?#bec_sK91(epPVO5;yLHHPp@x`pl%0GRu{To6;jA?y@)FzD&$2K8ns5rwGX>NEWe7;0pK5xq(ijkxGm+QxpfAo?U9z0Gk= zk=~?As<vB758PiEPB$nrF0kx4%1uhsNzslm)c=W% zFlBpy&v#J;F#v%7AEzQKsw^lXs4Uo};r)HS8Rv65M+i1W7}%812wc+!J*WJ_ zM>8Ycad~y`7-mEhuDZ?~zkxJ-W!BRp9&gk#sS2}xxhpU2v31}1^$gZ_kColw2~|yV z)Nr&pU+{)`>NyTO&5YO_@7}>#`_R2IS-5$G`NyUKMU5_QJ+o?h#SUb*wnUO;D9(k7 zDZR?*ow~8GD+(qr>V>
;mAt+>gV7*{n<(t#6)oNmB+~P1uzZMb~^P zMELjFliy`hQT#D)(L-cl!pjrrctw20N|@nrOv>8LM*r|w#r}DoNPc0ml5k{>D2T;< zlZ1HO`S!g7WyIM#_zek}8TFKt5NA}R2Kw}cY^LUk)w*uwXfnr8Ah%saos&`2<`*Nl z7Y-4w(TS+zo1oN<0N3pH0fu6yM&9<)MO2N5pl>k-yP}nx>&M5u31oR(z;DWxV9RuK z491!!Z~2(LW|DDv?|T-2Q=h|dXkRaHsN3T>L}jxKYg?JJ&@Nq696n5LzAw35lH?RO z9-Q%KeBRxUzh2EkYr129NrUA~WTv!UH{A#9+*i!o(fla7z_Kj8&xk%J6`0A9WpQ)A z4zeonOyJDAmB^XS7C;qcCg3`4Usj4ho$K)VRlR4)Rerj)wbc-G&E=@h9q^d@)UU6h z>aj)EhMM+EBwJjFy)(UQ%i2fMC0VzM_eqS)F6i0^26_UW-?4W}C)+wsgg^3L`-XNg zNWFif%PDqIVAhp%u~LoOYSB1Q>xR`TX5?Ja#{Th2l6QL_wAb?mm;!Dj(vBT}%I zwZSbXl#r{3JQldo2FiWMxvK!xgmutrpsB^Q?}EJ&uJV1y7sik-l6JPpESN?GC|m(` zogfDZYS_&y?x(WRClm9vyV~h};$UJ?2YfT)CQ85T_05@UO;SWemq4{7EcOt!Fy5(r z??=zdRpqe_r%7XLB;J;I9RB;Aj}!#xl}IE1%ZHxzSIoHLTb}8wiQvQtZRj0(W`55 zTurCYE!;Cz-OENYtUKlVZZ@RygV(u@9(FA7gX*U1F2p|JiC0)s7c`iCg>=AQ07eIJx znykjtY7moB_Uqtk;bkJu1U}T{KDy01i4z_SS{s&=*)(Xt^5Mr))MY~!5jL$kZU5dK zjIW<<%{MTr7wZn*XY+3Z)kV#w>z<x7U62Dacr{uVh2&PYRr>4-Trj&jy!9jbtu^9w4cF(L z;6f`+>vN7o;{}KR@!?rjFD;cP)1XV{?3g7~d22Ej3BQy6tVObof$^owi;p*ZEs>^=qE6>hy8ZG zj`35dSI-RuQjW6#B`rQSBOOHK<@TV%ex%J_1*-gq!!S(uC?n838Aeu=b`y>~{))>a zvQ6SBs-60<{S!@1fR!?MEv&T`rZFNqvwGS1I;na2;Km9MsbDgPQo+u7tJC2$^`j}Z zet!NN$IdmXu7@GA&2@s}6H8H{qqB67|LVzG|Aef$H8l*Wz(z$}h=!rUNXBUqD{iLY z0H;AirWFjoO_DI1XrZ=7Lz{MRX^16Rh+mjYr?Ce;4XcBQyHN9t%2i`Hqn;)VxmZ=0 zh5BsuYH0@6Ws+xsVvFKy31(--QmqId?_KdN?Q&6s3xp81G}tC6dT(ycJm5sUgGWp6 z+LlPO60)o5O+5S8V(Hf|siM0OMgNhNlS;}zjacX^b)@v8V63~tlACEmjBu7j2tN|2 zrnQVu5ZD<#lK3RPl~|?-j`=xsyl7a}7*MlndbbJi6tnv)P@h%xMUgp$J1nb`{oLBA z*;R(s8pq=RbBkB@E5Qyiexx@xKGKwS8xc&t#SUvLm!an(?}~P)N{nX7gI@!A8O941 znqaDnQ>Yyo{yL;_C}eoS40TG5k)G*3K1>IUCSoTq0izYk$g6NXNmS(z^(oUyrst5Q zX_c;0ibiENxH4Zi-S4QhC#W&BszB71bR+v^93cj72@*Hy$jV6=-i(ZE;QEr&k>UXx z@!9wO$gTj4^~s4J$bxp7{9~^Ql{|r0gkEbHBIG>K-^X zc7!p5bN||sM(=KzktjnL4(DJcYis-piLq zNPLO#cMA^sy|pl^Lc?RroLN^6y4C26>oo(t@llmu4m6K!lk8VU)GG2gf>VMfkFyp| zaKXVr$r)4*0S?Z-b<1k&3lfGNOI5Pa!Tt6GRO4%RF=%#n4m%DR=BI(2b zRB)`YJhIB*)tYmFSS^h|w$qvpzFet%(yhB9$8?tR3Bib}smzgDNx2}2AgxfGhHc7^ zq98^!ZIfK3N~z6f`_o9GZH8H*O0P!rb|7qrRaQNK=Z&nwvc2!Uhqc1$laoAWdFf*< zu{_^D!fvv3api;>Dbq0h@(lHDw}`;iH{`8g|SX^m5Fj8@8p+Q%J;9nOeJsxJuOKsVw=|#RK5wM5d~)4GeDZ772{j*a*!11h1GFFAIe*h(s(MVIPSF zgYmd31WP{u{K=f@JC#?ITW5TM`?jBwyyo)Vl9O$%qu1h1&0^ZYak;I3Z1b35udzhg zi^%os%h5Z^Ns5zW%_1h(JKyR_Zth4h#z0RvYBXV0%HUCS$*NWtydoltpiR!a9))LV zHHvUuJ_ROE5$h%eyk99}wUZT89eTZu-Q{_1`L^cNEftN9TIEs>bmJF}PziT1Z5bNc)s-I_RRZJ%W%AUXqR$uPXwEgfWQ(3>y-p4Knn8+>J(jUEqYLU`kXy zX@ize+q&cL1cD!ZOU#J$^yg-hmi<(Y59X{Z(ai-Prt#qi)~p^_@cC7;i7qp=(wSo3 zmD{t9tY=yhDh&9B3ciR?KVgJ8fxZD^Ga$A)$v|#c3!L0<-lH)-T<#xdhj(53$0O6( zm$6sYe62lBpXZPr@0&cnL7a_z%-s0iYreoorz|K~RM(Ln>jkC{xlbu)q7D(U;#!woH7`Wb2Z3uCn{=B+5eyRG` zcb}XeCmvW{y0H8B6C1SA@`u+m<@X=q1K}{V?>yb zo|ADht7g~oP!TfE&;6_k`H(}^3GQ}w2lV>^S}HRS6nJGT%!*bdd1Nb=cSzCwR%THm)7cA)Z+%Yv2^Y{gW`K0j zjvn=hL^7&3($U39I_6t|O+Sg$Pj1AnoyKyms7GQMq38md(?B#$oi^4Q=HE~r}`mV}@`6f`+ zqJ%ra(VV6}CT*n_Tj^B;jOz7GwcUIV-YXHZ8agY;n?K9i{TU~8ZjtSgj~Um`{d5S8 zj9%6`GFBA>_^lME-%*3t9(jjM?4Hn^6Y&$;Ovx=BHKT2RRkwJ)SUuC4)GL2kh(m{Q zR+%$3j=W3sedUA&VTJ+5v!FemkW#K;Mww)%g}I-PH`ZC*j#ZQG*h;?M=%vJ0y{wz< zsA7-KPi#A^T)&Rjs!q;%{)v)nam^789rMMCOX68LPkY4ayrA{jybcFL!J&#Am|h&l zQAuQv83?(v&mk`j0*VIw&u#X9Ui5!$h5r9I{GX=#zr+9DU;f`<0HDxc?2qgG5By)< z=6^^3UE}`?4Z!*r`o9$aza#&yy8VSL{o||u^W1-GaDNB?UBLPaoc>3L`fu<*<*dJB z|1Mhnh28rn_W#7l|43N>o9h3kCj5&EB$4ocrtyEM{!3f Date: Tue, 8 Aug 2023 10:07:55 +0200 Subject: [PATCH 42/42] python: mkvenv: remove ensure command This was used to bootstrap the venv with a TOML parser, after which ensuregroup is used. Now that we expect it to be present as a system package (either tomli or, for Python 3.11, tomllib), it is not needed anymore. Note that this means that, when implemented, the hypothetical "isolated" mode that does not use any system packages will only work with Python 3.11+. Signed-off-by: Paolo Bonzini --- python/scripts/mkvenv.py | 105 --------------------------------------- 1 file changed, 105 deletions(-) diff --git a/python/scripts/mkvenv.py b/python/scripts/mkvenv.py index d0b9c215ca..f2526af0a0 100644 --- a/python/scripts/mkvenv.py +++ b/python/scripts/mkvenv.py @@ -13,7 +13,6 @@ Commands: create create a venv post_init post-venv initialization - ensure Ensure that the specified package is installed. ensuregroup Ensure that the specified package group is installed. @@ -36,18 +35,6 @@ options: -------------------------------------------------- -usage: mkvenv ensure [-h] [--online] [--dir DIR] dep_spec... - -positional arguments: - dep_spec PEP 508 Dependency specification, e.g. 'meson>=0.61.5' - -options: - -h, --help show this help message and exit - --online Install packages from PyPI, if necessary. - --dir DIR Path to vendored packages where we may install from. - --------------------------------------------------- - usage: mkvenv ensuregroup [-h] [--online] [--dir DIR] file group... positional arguments: @@ -726,57 +713,6 @@ def _do_ensure( return None -def ensure( - dep_specs: Sequence[str], - online: bool = False, - wheels_dir: Optional[Union[str, Path]] = None, - prog: Optional[str] = None, -) -> None: - """ - Use pip to ensure we have the package specified by @dep_specs. - - If the package is already installed, do nothing. If online and - wheels_dir are both provided, prefer packages found in wheels_dir - first before connecting to PyPI. - - :param dep_specs: - PEP 508 dependency specifications. e.g. ['meson>=0.61.5']. - :param online: If True, fall back to PyPI. - :param wheels_dir: If specified, search this path for packages. - :param prog: - If specified, use this program name for error diagnostics that will - be presented to the user. e.g., 'sphinx-build' can be used as a - bellwether for the presence of 'sphinx'. - """ - - if not HAVE_DISTLIB: - raise Ouch("a usable distlib could not be found, please install it") - - # Convert the depspecs to a dictionary, as if they came - # from a section in a pythondeps.toml file - group: Dict[str, Dict[str, str]] = {} - for spec in dep_specs: - name = distlib.version.LegacyMatcher(spec).name - group[name] = {} - - spec = spec.strip() - pos = len(name) - ver = spec[pos:].strip() - if ver: - group[name]["accepted"] = ver - - if prog: - group[name]["canary"] = prog - prog = None - - result = _do_ensure(group, online, wheels_dir) - if result: - # Well, that's not good. - if result[1]: - raise Ouch(result[0]) - raise SystemExit(f"\n{result[0]}\n\n") - - def _parse_groups(file: str) -> Dict[str, Dict[str, Any]]: if not HAVE_TOMLLIB: if sys.version_info < (3, 11): @@ -888,39 +824,6 @@ def _add_ensuregroup_subcommand(subparsers: Any) -> None: ) -def _add_ensure_subcommand(subparsers: Any) -> None: - subparser = subparsers.add_parser( - "ensure", help="Ensure that the specified package is installed." - ) - subparser.add_argument( - "--online", - action="store_true", - help="Install packages from PyPI, if necessary.", - ) - subparser.add_argument( - "--dir", - type=str, - action="store", - help="Path to vendored packages where we may install from.", - ) - subparser.add_argument( - "--diagnose", - type=str, - action="store", - help=( - "Name of a shell utility to use for " - "diagnostics if this command fails." - ), - ) - subparser.add_argument( - "dep_specs", - type=str, - action="store", - help="PEP 508 Dependency specification, e.g. 'meson>=0.61.5'", - nargs="+", - ) - - def main() -> int: """CLI interface to make_qemu_venv. See module docstring.""" if os.environ.get("DEBUG") or os.environ.get("GITLAB_CI"): @@ -944,7 +847,6 @@ def main() -> int: _add_create_subcommand(subparsers) _add_post_init_subcommand(subparsers) - _add_ensure_subcommand(subparsers) _add_ensuregroup_subcommand(subparsers) args = parser.parse_args() @@ -957,13 +859,6 @@ def main() -> int: ) if args.command == "post_init": post_venv_setup() - if args.command == "ensure": - ensure( - dep_specs=args.dep_specs, - online=args.online, - wheels_dir=args.dir, - prog=args.diagnose, - ) if args.command == "ensuregroup": ensure_group( file=args.file,