diff --git a/hw/core/machine.c b/hw/core/machine.c index c93d249244..655d75c21f 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -36,6 +36,7 @@ GlobalProperty hw_compat_9_0[] = { {"arm-cpu", "backcompat-cntfrq", "true" }, + {"scsi-disk-base", "migrate-emulated-scsi-request", "false" }, {"vfio-pci", "skip-vsc-check", "false" }, }; const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0); diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c index 0812d39c02..a67092db6a 100644 --- a/hw/scsi/scsi-disk.c +++ b/hw/scsi/scsi-disk.c @@ -114,6 +114,7 @@ struct SCSIDiskState { * 0xffff - reserved */ uint16_t rotation_rate; + bool migrate_emulated_scsi_request; }; static void scsi_free_request(SCSIRequest *req) @@ -162,6 +163,15 @@ static void scsi_disk_save_request(QEMUFile *f, SCSIRequest *req) } } +static void scsi_disk_emulate_save_request(QEMUFile *f, SCSIRequest *req) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + + if (s->migrate_emulated_scsi_request) { + scsi_disk_save_request(f, req); + } +} + static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req) { SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req); @@ -185,6 +195,15 @@ static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req) qemu_iovec_init_external(&r->qiov, &r->iov, 1); } +static void scsi_disk_emulate_load_request(QEMUFile *f, SCSIRequest *req) +{ + SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev); + + if (s->migrate_emulated_scsi_request) { + scsi_disk_load_request(f, req); + } +} + /* * scsi_handle_rw_error has two return values. False means that the error * must be ignored, true means that the error has been processed and the @@ -2606,6 +2625,8 @@ static const SCSIReqOps scsi_disk_emulate_reqops = { .read_data = scsi_disk_emulate_read_data, .write_data = scsi_disk_emulate_write_data, .get_buf = scsi_get_buf, + .load_request = scsi_disk_emulate_load_request, + .save_request = scsi_disk_emulate_save_request, }; static const SCSIReqOps scsi_disk_dma_reqops = { @@ -3114,7 +3135,8 @@ static const TypeInfo scsi_disk_base_info = { DEFINE_PROP_STRING("serial", SCSIDiskState, serial), \ DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor), \ DEFINE_PROP_STRING("product", SCSIDiskState, product), \ - DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id) + DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id), \ + DEFINE_PROP_BOOL("migrate-emulated-scsi-request", SCSIDiskState, migrate_emulated_scsi_request, true) static Property scsi_hd_properties[] = { diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 7466217d5e..365852cb99 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -6455,10 +6455,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if (*eax & 31) { int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); - if (cores_per_pkg > 1) { - *eax &= ~0xFC000000; - *eax |= max_core_ids_in_package(&topo_info) << 26; - } + *eax &= ~0xFC000000; + *eax |= max_core_ids_in_package(&topo_info) << 26; if (host_vcpus_per_cache > threads_per_pkg) { *eax &= ~0x3FFC000; diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 8fe28b67e0..7e2a9b56ae 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1260,6 +1260,8 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, /* Use a clearer name for this. */ #define CPU_INTERRUPT_INIT CPU_INTERRUPT_RESET +#define CC_OP_HAS_EFLAGS(op) ((op) >= CC_OP_EFLAGS && (op) <= CC_OP_ADCOX) + /* Instead of computing the condition codes after each x86 instruction, * QEMU just stores one operand (called CC_SRC), the result * (called CC_DST) and the type of operation (called CC_OP). When the @@ -1270,6 +1272,9 @@ uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, typedef enum { CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ CC_OP_EFLAGS, /* all cc are explicitly computed, CC_SRC = flags */ + CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */ + CC_OP_ADOX, /* CC_SRC2 = O, CC_SRC = rest. */ + CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */ CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */ CC_OP_MULW, @@ -1326,10 +1331,6 @@ typedef enum { CC_OP_BMILGL, CC_OP_BMILGQ, - CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest. */ - CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest. */ - CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest. */ - CC_OP_CLR, /* Z set, all other flags clear. */ CC_OP_POPCNT, /* Z via CC_SRC, all other flags clear. */ diff --git a/target/i386/helper.h b/target/i386/helper.h index 2f46cffabd..eeb8df56ea 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -95,7 +95,7 @@ DEF_HELPER_FLAGS_2(monitor, TCG_CALL_NO_WG, void, env, tl) DEF_HELPER_FLAGS_2(mwait, TCG_CALL_NO_WG, noreturn, env, int) DEF_HELPER_1(rdmsr, void, env) DEF_HELPER_1(wrmsr, void, env) -DEF_HELPER_FLAGS_2(read_crN, TCG_CALL_NO_RWG, tl, env, int) +DEF_HELPER_FLAGS_1(read_cr8, TCG_CALL_NO_RWG, tl, env) DEF_HELPER_FLAGS_3(write_crN, TCG_CALL_NO_RWG, void, env, int, tl) #endif /* !CONFIG_USER_ONLY */ diff --git a/target/i386/sev.c b/target/i386/sev.c index 004c667ac1..30b83f1d77 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -587,6 +587,7 @@ static SevCapability *sev_get_capabilities(Error **errp) sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); if (!sev_common) { error_setg(errp, "SEV is not configured"); + return NULL; } sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", @@ -1529,11 +1530,12 @@ int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + SevCommonStateClass *klass; if (!sev_common) { return 0; } + klass = SEV_COMMON_GET_CLASS(sev_common); /* if SEV is in update state then encrypt the data else do nothing */ if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { @@ -1710,7 +1712,9 @@ void sev_es_set_reset_vector(CPUState *cpu) { X86CPU *x86; CPUX86State *env; - SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + SevCommonState *sev_common = SEV_COMMON( + object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON)); /* Only update if we have valid reset information */ if (!sev_common || !sev_common->reset_data_valid) { @@ -2165,6 +2169,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; gsize len; + finish->id_block_en = 0; g_free(sev_snp_guest->id_block); g_free((guchar *)finish->id_block_uaddr); @@ -2184,7 +2189,7 @@ sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) return; } - finish->id_block_en = (len) ? 1 : 0; + finish->id_block_en = 1; } static char * diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index c2d8da8d14..0d846c32c2 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -151,6 +151,8 @@ X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__) #define X86_OP_GROUPw(op, op0, s0, ...) \ X86_OP_GROUP3(op, op0, s0, None, None, None, None, ## __VA_ARGS__) +#define X86_OP_GROUPwr(op, op0, s0, op1, s1, ...) \ + X86_OP_GROUP3(op, op0, s0, op1, s1, None, None, ## __VA_ARGS__) #define X86_OP_GROUP0(op, ...) \ X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__) @@ -180,20 +182,20 @@ #define X86_OP_ENTRYrr(op, op0, s0, op1, s1, ...) \ X86_OP_ENTRY3(op, None, None, op0, s0, op1, s1, ## __VA_ARGS__) #define X86_OP_ENTRYwr(op, op0, s0, op1, s1, ...) \ - X86_OP_ENTRY3(op, op0, s0, None, None, op1, s1, ## __VA_ARGS__) + X86_OP_ENTRY3(op, op0, s0, op1, s1, None, None, ## __VA_ARGS__) #define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...) \ X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__) #define X86_OP_ENTRYw(op, op0, s0, ...) \ X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__) #define X86_OP_ENTRYr(op, op0, s0, ...) \ - X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__) + X86_OP_ENTRY3(op, None, None, op0, s0, None, None, ## __VA_ARGS__) #define X86_OP_ENTRY1(op, op0, s0, ...) \ X86_OP_ENTRY3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__) #define X86_OP_ENTRY0(op, ...) \ X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__) #define cpuid(feat) .cpuid = X86_FEAT_##feat, -#define noseg .special = X86_SPECIAL_NoSeg, +#define nolea .special = X86_SPECIAL_NoLoadEA, #define xchg .special = X86_SPECIAL_Locked, #define lock .special = X86_SPECIAL_HasLock, #define mmx .special = X86_SPECIAL_MMX, @@ -221,7 +223,9 @@ #define vex13 .vex_class = 13, #define chk(a) .check = X86_CHECK_##a, -#define svm(a) .intercept = SVM_EXIT_##a, +#define chk2(a, b) .check = X86_CHECK_##a | X86_CHECK_##b, +#define chk3(a, b, c) .check = X86_CHECK_##a | X86_CHECK_##b | X86_CHECK_##c, +#define svm(a) .intercept = SVM_EXIT_##a, .has_intercept = true, #define avx2_256 .vex_special = X86_VEX_AVX2_256, @@ -267,20 +271,41 @@ static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEnt static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) { - /* only includes ldmxcsr and stmxcsr, because they have AVX variants. */ static const X86OpEntry group15_reg[8] = { + [0] = X86_OP_ENTRYw(RDxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3), + [1] = X86_OP_ENTRYw(RDxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3), + [2] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0), + [3] = X86_OP_ENTRYr(WRxxBASE, R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0), + [5] = X86_OP_ENTRY0(LFENCE, cpuid(SSE2) p_00), + [6] = X86_OP_ENTRY0(MFENCE, cpuid(SSE2) p_00), + [7] = X86_OP_ENTRY0(SFENCE, cpuid(SSE2) p_00), }; static const X86OpEntry group15_mem[8] = { - [2] = X86_OP_ENTRYr(LDMXCSR, E,d, vex5 chk(VEX128)), - [3] = X86_OP_ENTRYw(STMXCSR, E,d, vex5 chk(VEX128)), + [0] = X86_OP_ENTRYw(FXSAVE, M,y, cpuid(FXSR) p_00), + [1] = X86_OP_ENTRYr(FXRSTOR, M,y, cpuid(FXSR) p_00), + [2] = X86_OP_ENTRYr(LDMXCSR, E,d, vex5 chk(VEX128) p_00), + [3] = X86_OP_ENTRYw(STMXCSR, E,d, vex5 chk(VEX128) p_00), + [4] = X86_OP_ENTRYw(XSAVE, M,y, cpuid(XSAVE) p_00), + [5] = X86_OP_ENTRYr(XRSTOR, M,y, cpuid(XSAVE) p_00), + [6] = X86_OP_ENTRYw(XSAVEOPT, M,b, cpuid(XSAVEOPT) p_00), + [7] = X86_OP_ENTRYw(NOP, M,b, cpuid(CLFLUSH) p_00), + }; + + static const X86OpEntry group15_mem_66[8] = { + [6] = X86_OP_ENTRYw(NOP, M,b, cpuid(CLWB)), + [7] = X86_OP_ENTRYw(NOP, M,b, cpuid(CLFLUSHOPT)), }; uint8_t modrm = get_modrm(s, env); + int op = (modrm >> 3) & 7; + if ((modrm >> 6) == 3) { - *entry = group15_reg[(modrm >> 3) & 7]; + *entry = group15_reg[op]; + } else if (s->prefix & PREFIX_DATA) { + *entry = group15_mem_66[op]; } else { - *entry = group15_mem[(modrm >> 3) & 7]; + *entry = group15_mem[op]; } } @@ -425,6 +450,50 @@ static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui *entry = *decode_by_prefix(s, opcodes_0F7F); } +static void decode_0FB8(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry popcnt = + X86_OP_ENTRYwr(POPCNT, G,v, E,v, cpuid(POPCNT) zextT0); + + if (s->prefix & PREFIX_REPZ) { + *entry = popcnt; + } else { + memset(entry, 0, sizeof(*entry)); + } +} + +static void decode_0FBC(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + /* For BSF, pass 2op as the third operand so that we can use zextT0 */ + static const X86OpEntry opcodes_0FBC[4] = { + X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0), + X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0), /* 0x66 */ + X86_OP_ENTRYwr(TZCNT, G,v, E,v, zextT0), /* 0xf3 */ + X86_OP_ENTRY3(BSF, G,v, E,v, 2op,v, zextT0), /* 0xf2 */ + }; + if (!(s->cpuid_ext3_features & CPUID_EXT3_ABM)) { + *entry = opcodes_0FBC[0]; + } else { + *entry = *decode_by_prefix(s, opcodes_0FBC); + } +} + +static void decode_0FBD(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + /* For BSR, pass 2op as the third operand so that we can use zextT0 */ + static const X86OpEntry opcodes_0FBD[4] = { + X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0), + X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0), /* 0x66 */ + X86_OP_ENTRYwr(LZCNT, G,v, E,v, zextT0), /* 0xf3 */ + X86_OP_ENTRY3(BSR, G,v, E,v, 2op,v, zextT0), /* 0xf2 */ + }; + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) { + *entry = opcodes_0FBD[0]; + } else { + *entry = *decode_by_prefix(s, opcodes_0FBD); + } +} + static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) { static const X86OpEntry movq[4] = { @@ -612,15 +681,15 @@ static const X86OpEntry opcodes_0F38_00toEF[240] = { /* five rows for no prefix, 66, F3, F2, 66+F2 */ static const X86OpEntry opcodes_0F38_F0toFF[16][5] = { [0] = { - X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)), - X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)), + X86_OP_ENTRYwr(MOVBE, G,y, M,y, cpuid(MOVBE)), + X86_OP_ENTRYwr(MOVBE, G,w, M,w, cpuid(MOVBE)), {}, X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)), X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)), }, [1] = { - X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)), - X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)), + X86_OP_ENTRYwr(MOVBE, M,y, G,y, cpuid(MOVBE)), + X86_OP_ENTRYwr(MOVBE, M,w, G,w, cpuid(MOVBE)), {}, X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)), X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)), @@ -633,7 +702,7 @@ static const X86OpEntry opcodes_0F38_F0toFF[16][5] = { {}, }, [3] = { - X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)), + X86_OP_GROUP3(group17, B,y, None,None, E,y, vex13 cpuid(BMI1)), {}, {}, {}, @@ -985,14 +1054,30 @@ static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, ui *entry = *decode_by_prefix(s, opcodes_0FE6); } -static const X86OpEntry opcodes_0F[256] = { - [0x0E] = X86_OP_ENTRY0(EMMS, cpuid(3DNOW)), /* femms */ +/* + * These ignore the mod bits (assume (modrm&0xc0)==0xc0), so group the + * pre-decode tweak here for all MOVs from/to CR and DR. + * + * AMD documentation (24594.pdf) and testing of Intel 386 and 486 + * processors all show that the mod bits are assumed to be 1's, + * regardless of actual values. + */ +static void decode_MOV_CR_DR(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ /* - * 3DNow!'s opcode byte comes *after* modrm and displacements, making it - * more like an Ib operand. Dispatch to the right helper in a single gen_* - * function. */ - [0x0F] = X86_OP_ENTRY3(3dnow, P,q, Q,q, I,b, cpuid(3DNOW)), + get_modrm(s, env); + s->modrm |= 0xC0; + + entry->gen = gen_MOV; +} + +static const X86OpEntry opcodes_0F[256] = { + [0x02] = X86_OP_ENTRYwr(LAR, G,v, E,w, chk(prot)), + [0x03] = X86_OP_ENTRYwr(LSL, G,v, E,w, chk(prot)), + [0x05] = X86_OP_ENTRY0(SYSCALL, chk(o64_intel)), + [0x06] = X86_OP_ENTRY0(CLTS, chk(cpl0) svm(WRITE_CR0)), + [0x07] = X86_OP_ENTRY0(SYSRET, chk3(o64_intel, prot, cpl0)), [0x10] = X86_OP_GROUP0(0F10), [0x11] = X86_OP_GROUP0(0F11), @@ -1004,6 +1089,22 @@ static const X86OpEntry opcodes_0F[256] = { /* Incorrectly listed as Mq,Vq in the manual */ [0x17] = X86_OP_ENTRY3(VMOVHPx_st, M,q, None,None, V,dq, vex5 p_00_66), + /* + * Incorrectly listed as using "d" operand type in the manual. In reality + * there's no 16-bit version (like y) and it does not use REX.W (like d64). + */ + [0x20] = X86_OP_GROUPwr(MOV_CR_DR, R,y_d64, C,y_d64, chk(cpl0) svm(READ_CR0)), + [0x21] = X86_OP_GROUPwr(MOV_CR_DR, R,y_d64, D,y_d64, chk(cpl0) svm(READ_DR0)), + [0x22] = X86_OP_GROUPwr(MOV_CR_DR, C,y_d64, R,y_d64, zextT0 chk(cpl0) svm(WRITE_CR0)), + [0x23] = X86_OP_GROUPwr(MOV_CR_DR, D,y_d64, R,y_d64, zextT0 chk(cpl0) svm(WRITE_DR0)), + + [0x30] = X86_OP_ENTRY0(WRMSR, chk(cpl0)), + [0x31] = X86_OP_ENTRY0(RDTSC), + [0x32] = X86_OP_ENTRY0(RDMSR, chk(cpl0)), + [0x33] = X86_OP_ENTRY0(RDPMC), + [0x34] = X86_OP_ENTRY0(SYSENTER, chk2(i64_amd, prot_or_vm86)), + [0x35] = X86_OP_ENTRY0(SYSEXIT, chk3(i64_amd, prot, cpl0)), + [0x40] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), [0x41] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), [0x42] = X86_OP_ENTRY2(CMOVcc, G,v, E,v, cpuid(CMOV)), @@ -1060,9 +1161,64 @@ static const X86OpEntry opcodes_0F[256] = { [0xa0] = X86_OP_ENTRYr(PUSH, FS, w), [0xa1] = X86_OP_ENTRYw(POP, FS, w), + [0xa2] = X86_OP_ENTRY0(CPUID), + [0xa4] = X86_OP_ENTRY4(SHLD, E,v, 2op,v, G,v), + [0xa5] = X86_OP_ENTRY3(SHLD, E,v, 2op,v, G,v), + [0xb0] = X86_OP_ENTRY2(CMPXCHG,E,b, G,b, lock), + [0xb1] = X86_OP_ENTRY2(CMPXCHG,E,v, G,v, lock), + [0xb2] = X86_OP_ENTRY3(LSS, G,v, EM,p, None, None), + [0xb4] = X86_OP_ENTRY3(LFS, G,v, EM,p, None, None), + [0xb5] = X86_OP_ENTRY3(LGS, G,v, EM,p, None, None), + [0xb6] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, zextT0), /* MOVZX */ + [0xb7] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, zextT0), /* MOVZX */ + + [0xc0] = X86_OP_ENTRY2(XADD, E,b, G,b, lock), + [0xc1] = X86_OP_ENTRY2(XADD, E,v, G,v, lock), + [0xc2] = X86_OP_ENTRY4(VCMP, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0xc3] = X86_OP_ENTRY3(MOV, EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */ + [0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66), + [0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66), + [0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66), + + [0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2), + [0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd2] = X86_OP_ENTRY3(PSRLD_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd3] = X86_OP_ENTRY3(PSRLQ_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd4] = X86_OP_ENTRY3(PADDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd5] = X86_OP_ENTRY3(PMULLW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd6] = X86_OP_GROUP0(0FD6), + [0xd7] = X86_OP_ENTRY3(PMOVMSKB, G,d, None,None, U,x, vex7 mmx avx2_256 p_00_66), + + [0xe0] = X86_OP_ENTRY3(PAVGB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe1] = X86_OP_ENTRY3(PSRAW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xe2] = X86_OP_ENTRY3(PSRAD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xe3] = X86_OP_ENTRY3(PAVGW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe4] = X86_OP_ENTRY3(PMULHUW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe5] = X86_OP_ENTRY3(PMULHW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe6] = X86_OP_GROUP0(0FE6), + [0xe7] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */ + + [0xf0] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, WM,x, vex4_unal cpuid(SSE3) p_f2), /* LDDQU */ + [0xf1] = X86_OP_ENTRY3(PSLLW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xf2] = X86_OP_ENTRY3(PSLLD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xf3] = X86_OP_ENTRY3(PSLLQ_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xf4] = X86_OP_ENTRY3(PMULUDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xf5] = X86_OP_ENTRY3(PMADDWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xf6] = X86_OP_ENTRY3(PSADBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xf7] = X86_OP_ENTRY3(MASKMOV, None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66), + + [0x08] = X86_OP_ENTRY0(NOP, svm(INVD)), + [0x09] = X86_OP_ENTRY0(NOP, svm(WBINVD)), [0x0b] = X86_OP_ENTRY0(UD), /* UD2 */ [0x0d] = X86_OP_ENTRY1(NOP, M,v), /* 3DNow! prefetch */ + [0x0e] = X86_OP_ENTRY0(EMMS, cpuid(3DNOW)), /* femms */ + /* + * 3DNow!'s opcode byte comes *after* modrm and displacements, making it + * more like an Ib operand. Dispatch to the right helper in a single gen_* + * function. + */ + [0x0f] = X86_OP_ENTRY3(3dnow, P,q, Q,q, I,b, cpuid(3DNOW)), [0x18] = X86_OP_ENTRY1(NOP, nop,v), /* prefetch/reserved NOP */ [0x19] = X86_OP_ENTRY1(NOP, nop,v), /* reserved NOP */ @@ -1137,6 +1293,9 @@ static const X86OpEntry opcodes_0F[256] = { [0xa8] = X86_OP_ENTRYr(PUSH, GS, w), [0xa9] = X86_OP_ENTRYw(POP, GS, w), + [0xaa] = X86_OP_ENTRY0(RSM, chk(smm) svm(RSM)), + [0xac] = X86_OP_ENTRY4(SHRD, E,v, 2op,v, G,v), + [0xad] = X86_OP_ENTRY3(SHRD, E,v, 2op,v, G,v), [0xae] = X86_OP_GROUP0(group15), /* * It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3 @@ -1144,23 +1303,14 @@ static const X86OpEntry opcodes_0F[256] = { */ [0xaf] = X86_OP_ENTRY3(IMUL3, G,v, E,v, 2op,v, sextT0), - [0xb2] = X86_OP_ENTRY3(LSS, G,v, EM,p, None, None), - [0xb4] = X86_OP_ENTRY3(LFS, G,v, EM,p, None, None), - [0xb5] = X86_OP_ENTRY3(LGS, G,v, EM,p, None, None), - [0xb6] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, zextT0), /* MOVZX */ - [0xb7] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, zextT0), /* MOVZX */ - + [0xb8] = X86_OP_GROUP0(0FB8), /* decoded as modrm, which is visible as a difference between page fault and #UD */ [0xb9] = X86_OP_ENTRYr(UD, nop,v), /* UD1 */ + [0xbc] = X86_OP_GROUP0(0FBC), + [0xbd] = X86_OP_GROUP0(0FBD), [0xbe] = X86_OP_ENTRY3(MOV, G,v, E,b, None, None, sextT0), /* MOVSX */ [0xbf] = X86_OP_ENTRY3(MOV, G,v, E,w, None, None, sextT0), /* MOVSX */ - [0xc2] = X86_OP_ENTRY4(VCMP, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), - [0xc3] = X86_OP_ENTRY3(MOV, EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */ - [0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66), - [0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66), - [0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66), - [0xc8] = X86_OP_ENTRY1(BSWAP, LoBits,y), [0xc9] = X86_OP_ENTRY1(BSWAP, LoBits,y), [0xca] = X86_OP_ENTRY1(BSWAP, LoBits,y), @@ -1170,33 +1320,6 @@ static const X86OpEntry opcodes_0F[256] = { [0xce] = X86_OP_ENTRY1(BSWAP, LoBits,y), [0xcf] = X86_OP_ENTRY1(BSWAP, LoBits,y), - [0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2), - [0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), - [0xd2] = X86_OP_ENTRY3(PSRLD_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), - [0xd3] = X86_OP_ENTRY3(PSRLQ_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), - [0xd4] = X86_OP_ENTRY3(PADDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), - [0xd5] = X86_OP_ENTRY3(PMULLW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), - [0xd6] = X86_OP_GROUP0(0FD6), - [0xd7] = X86_OP_ENTRY3(PMOVMSKB, G,d, None,None, U,x, vex7 mmx avx2_256 p_00_66), - - [0xe0] = X86_OP_ENTRY3(PAVGB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), - [0xe1] = X86_OP_ENTRY3(PSRAW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), - [0xe2] = X86_OP_ENTRY3(PSRAD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), - [0xe3] = X86_OP_ENTRY3(PAVGW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), - [0xe4] = X86_OP_ENTRY3(PMULHUW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), - [0xe5] = X86_OP_ENTRY3(PMULHW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), - [0xe6] = X86_OP_GROUP0(0FE6), - [0xe7] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */ - - [0xf0] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, WM,x, vex4_unal cpuid(SSE3) p_f2), /* LDDQU */ - [0xf1] = X86_OP_ENTRY3(PSLLW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), - [0xf2] = X86_OP_ENTRY3(PSLLD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), - [0xf3] = X86_OP_ENTRY3(PSLLQ_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), - [0xf4] = X86_OP_ENTRY3(PMULUDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), - [0xf5] = X86_OP_ENTRY3(PMADDWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), - [0xf6] = X86_OP_ENTRY3(PSADBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), - [0xf7] = X86_OP_ENTRY3(MASKMOV, None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66), - /* Incorrectly missing from 2-17 */ [0xd8] = X86_OP_ENTRY3(PSUBUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), [0xd9] = X86_OP_ENTRY3(PSUBUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), @@ -1335,9 +1458,9 @@ static void decode_group4_5(DisasContext *s, CPUX86State *env, X86OpEntry *entry /* 0xff */ [0x08] = X86_OP_ENTRY1(INC, E,v, lock), [0x09] = X86_OP_ENTRY1(DEC, E,v, lock), - [0x0a] = X86_OP_ENTRY3(CALL_m, None, None, E,f64, None, None, zextT0), + [0x0a] = X86_OP_ENTRYr(CALL_m, E,f64, zextT0), [0x0b] = X86_OP_ENTRYr(CALLF_m, M,p), - [0x0c] = X86_OP_ENTRY3(JMP_m, None, None, E,f64, None, None, zextT0), + [0x0c] = X86_OP_ENTRYr(JMP_m, E,f64, zextT0), [0x0d] = X86_OP_ENTRYr(JMPF_m, M,p), [0x0e] = X86_OP_ENTRYr(PUSH, E,f64), }; @@ -1586,18 +1709,18 @@ static const X86OpEntry opcodes_root[256] = { [0x7E] = X86_OP_ENTRYr(Jcc, J,b), [0x7F] = X86_OP_ENTRYr(Jcc, J,b), - [0x88] = X86_OP_ENTRY3(MOV, E,b, G,b, None, None), - [0x89] = X86_OP_ENTRY3(MOV, E,v, G,v, None, None), - [0x8A] = X86_OP_ENTRY3(MOV, G,b, E,b, None, None), - [0x8B] = X86_OP_ENTRY3(MOV, G,v, E,v, None, None), - /* Missing in Table A-2: memory destination is always 16-bit. */ - [0x8C] = X86_OP_ENTRY3(MOV, E,v, S,w, None, None, op0_Mw), - [0x8D] = X86_OP_ENTRY3(LEA, G,v, M,v, None, None, noseg), - [0x8E] = X86_OP_ENTRY3(MOV, S,w, E,w, None, None), + [0x88] = X86_OP_ENTRYwr(MOV, E,b, G,b), + [0x89] = X86_OP_ENTRYwr(MOV, E,v, G,v), + [0x8A] = X86_OP_ENTRYwr(MOV, G,b, E,b), + [0x8B] = X86_OP_ENTRYwr(MOV, G,v, E,v), + /* Missing in Table A-2: memory destination is always 16-bit. */ + [0x8C] = X86_OP_ENTRYwr(MOV, E,v, S,w, op0_Mw), + [0x8D] = X86_OP_ENTRYwr(LEA, G,v, M,v, nolea), + [0x8E] = X86_OP_ENTRYwr(MOV, S,w, E,w), [0x8F] = X86_OP_GROUPw(group1A, E,v), [0x98] = X86_OP_ENTRY1(CBW, 0,v), /* rAX */ - [0x99] = X86_OP_ENTRY3(CWD, 2,v, 0,v, None, None), /* rDX, rAX */ + [0x99] = X86_OP_ENTRYwr(CWD, 2,v, 0,v), /* rDX, rAX */ [0x9A] = X86_OP_ENTRYrr(CALLF, I_unsigned,p, I_unsigned,w, chk(i64)), [0x9B] = X86_OP_ENTRY0(WAIT), [0x9C] = X86_OP_ENTRY0(PUSHF, chk(vm86_iopl) svm(PUSHF)), @@ -1607,22 +1730,22 @@ static const X86OpEntry opcodes_root[256] = { [0xA8] = X86_OP_ENTRYrr(AND, 0,b, I,b), /* AL, Ib */ [0xA9] = X86_OP_ENTRYrr(AND, 0,v, I,z), /* rAX, Iz */ - [0xAA] = X86_OP_ENTRY3(STOS, Y,b, 0,b, None, None), - [0xAB] = X86_OP_ENTRY3(STOS, Y,v, 0,v, None, None), + [0xAA] = X86_OP_ENTRYwr(STOS, Y,b, 0,b), + [0xAB] = X86_OP_ENTRYwr(STOS, Y,v, 0,v), /* Manual writeback because REP LODS (!) has to write EAX/RAX after every LODS. */ [0xAC] = X86_OP_ENTRYr(LODS, X,b), [0xAD] = X86_OP_ENTRYr(LODS, X,v), [0xAE] = X86_OP_ENTRYrr(SCAS, 0,b, Y,b), [0xAF] = X86_OP_ENTRYrr(SCAS, 0,v, Y,v), - [0xB8] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), - [0xB9] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), - [0xBA] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), - [0xBB] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), - [0xBC] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), - [0xBD] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), - [0xBE] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), - [0xBF] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None), + [0xB8] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v), + [0xB9] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v), + [0xBA] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v), + [0xBB] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v), + [0xBC] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v), + [0xBD] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v), + [0xBE] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v), + [0xBF] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v), [0xC8] = X86_OP_ENTRYrr(ENTER, I,w, I,b), [0xC9] = X86_OP_ENTRY1(LEAVE, A,d64), @@ -1725,6 +1848,10 @@ static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot = s->dflag == MO_16 ? MO_32 : s->dflag; return true; + case X86_SIZE_y_d64: /* Full (not 16-bit) register access */ + *ot = CODE64(s) ? MO_64 : MO_32; + return true; + case X86_SIZE_z: /* 16-bit for 16-bit operand size, else 32-bit */ *ot = s->dflag == MO_16 ? MO_16 : MO_32; return true; @@ -1802,11 +1929,34 @@ static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, case X86_TYPE_C: /* REG in the modrm byte selects a control register */ op->unit = X86_OP_CR; - goto get_reg; + op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s); + if (op->n == 0 && (s->prefix & PREFIX_LOCK) && + (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) { + op->n = 8; + s->prefix &= ~PREFIX_LOCK; + } + if (op->n != 0 && op->n != 2 && op->n != 3 && op->n != 4 && op->n != 8) { + return false; + } + if (decode->e.intercept) { + decode->e.intercept += op->n; + } + break; case X86_TYPE_D: /* REG in the modrm byte selects a debug register */ op->unit = X86_OP_DR; - goto get_reg; + op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s); + if (op->n >= 8) { + /* + * illegal opcode. The DR4 and DR5 case is checked in the generated + * code instead, to save on hflags bits. + */ + return false; + } + if (decode->e.intercept) { + decode->e.intercept += op->n; + } + break; case X86_TYPE_G: /* REG in the modrm byte selects a GPR */ op->unit = X86_OP_INT; @@ -2047,6 +2197,10 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) return true; case X86_FEAT_CMOV: return (s->cpuid_features & CPUID_CMOV); + case X86_FEAT_CLFLUSH: + return (s->cpuid_features & CPUID_CLFLUSH); + case X86_FEAT_FXSR: + return (s->cpuid_features & CPUID_FXSR); case X86_FEAT_F16C: return (s->cpuid_ext_features & CPUID_EXT_F16C); case X86_FEAT_FMA: @@ -2055,6 +2209,8 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) return (s->cpuid_ext_features & CPUID_EXT_MOVBE); case X86_FEAT_PCLMULQDQ: return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ); + case X86_FEAT_POPCNT: + return (s->cpuid_ext_features & CPUID_EXT_POPCNT); case X86_FEAT_SSE: return (s->cpuid_features & CPUID_SSE); case X86_FEAT_SSE2: @@ -2080,6 +2236,8 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) case X86_FEAT_AVX: return (s->cpuid_ext_features & CPUID_EXT_AVX); + case X86_FEAT_XSAVE: + return (s->cpuid_ext_features & CPUID_EXT_XSAVE); case X86_FEAT_3DNOW: return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW); @@ -2094,11 +2252,20 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2); case X86_FEAT_AVX2: return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2); + case X86_FEAT_CLFLUSHOPT: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT); + case X86_FEAT_CLWB: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB); + case X86_FEAT_FSGSBASE: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE); case X86_FEAT_SHA_NI: return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI); case X86_FEAT_CMPCCXADD: return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD); + + case X86_FEAT_XSAVEOPT: + return (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT); } g_assert_not_reached(); } @@ -2428,18 +2595,12 @@ static void disas_insn(DisasContext *s, CPUState *cpu) if (b == 0x0f) { b = x86_ldub_code(env, s); switch (b) { - case 0x00 ... 0x03: /* mostly privileged instructions */ - case 0x05 ... 0x09: + case 0x00 ... 0x01: /* mostly privileged instructions */ case 0x1a ... 0x1b: /* MPX */ - case 0x20 ... 0x23: /* mov from/to CR and DR */ - case 0x30 ... 0x35: /* more privileged instructions */ - case 0xa2 ... 0xa5: /* CPUID, BT, SHLD */ - case 0xaa ... 0xae: /* RSM, SHRD, grp15 */ - case 0xb0 ... 0xb1: /* cmpxchg */ + case 0xa3: /* bt */ + case 0xab: /* bts */ case 0xb3: /* btr */ - case 0xb8: /* integer ops */ - case 0xba ... 0xbd: /* integer ops */ - case 0xc0 ... 0xc1: /* xadd */ + case 0xba ... 0xbb: /* grp8, btc */ case 0xc7: /* grp9 */ disas_insn_old(s, cpu, b + 0x100); return; @@ -2466,18 +2627,28 @@ static void disas_insn(DisasContext *s, CPUState *cpu) /* Checks that result in #UD come first. */ if (decode.e.check) { - if (decode.e.check & X86_CHECK_i64) { - if (CODE64(s)) { + if (CODE64(s)) { + if (decode.e.check & X86_CHECK_i64) { + goto illegal_op; + } + if ((decode.e.check & X86_CHECK_i64_amd) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) { + goto illegal_op; + } + } else { + if (decode.e.check & X86_CHECK_o64) { + goto illegal_op; + } + if ((decode.e.check & X86_CHECK_o64_intel) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) { goto illegal_op; } } - if (decode.e.check & X86_CHECK_o64) { - if (!CODE64(s)) { + if (decode.e.check & X86_CHECK_prot_or_vm86) { + if (!PE(s)) { goto illegal_op; } } - if (decode.e.check & X86_CHECK_prot) { - if (!PE(s) || VM86(s)) { + if (decode.e.check & X86_CHECK_no_vm86) { + if (VM86(s)) { goto illegal_op; } } @@ -2524,11 +2695,6 @@ static void disas_insn(DisasContext *s, CPUState *cpu) assert(decode.op[1].unit == X86_OP_INT); break; - case X86_SPECIAL_NoSeg: - decode.mem.def_seg = -1; - s->override = -1; - break; - case X86_SPECIAL_Op0_Mw: assert(decode.op[0].unit == X86_OP_INT); if (decode.op[0].has_ea) { @@ -2556,19 +2722,21 @@ static void disas_insn(DisasContext *s, CPUState *cpu) * exceptions if there is no memory operand). Exceptions are * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!). * - * RSM and XSETBV will be handled in the gen_* functions - * instead of using chk(). + * XSETBV will check for CPL0 in the gen_* function instead of using chk(). */ if (decode.e.check & X86_CHECK_cpl0) { if (CPL(s) != 0) { goto gp_fault; } } - if (decode.e.intercept && unlikely(GUEST(s))) { + if (decode.e.has_intercept && unlikely(GUEST(s))) { gen_helper_svm_check_intercept(tcg_env, tcg_constant_i32(decode.e.intercept)); } if (decode.e.check) { + if ((decode.e.check & X86_CHECK_smm) && !(s->flags & HF_SMM_MASK)) { + goto illegal_op; + } if ((decode.e.check & X86_CHECK_vm86_iopl) && VM86(s)) { if (IOPL(s) < 3) { goto gp_fault; @@ -2585,12 +2753,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu) gen_helper_enter_mmx(tcg_env); } - if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) { + if (decode.e.special != X86_SPECIAL_NoLoadEA && + (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea)) { gen_load_ea(s, &decode.mem, decode.e.vex_class == 12); } if (s->prefix & PREFIX_LOCK) { gen_load(s, &decode, 2, s->T1); - decode.e.gen(s, env, &decode); + decode.e.gen(s, &decode); } else { if (decode.op[0].unit == X86_OP_MMX) { compute_mmx_offset(&decode.op[0]); @@ -2599,12 +2768,12 @@ static void disas_insn(DisasContext *s, CPUState *cpu) } gen_load(s, &decode, 1, s->T0); gen_load(s, &decode, 2, s->T1); - decode.e.gen(s, env, &decode); + decode.e.gen(s, &decode); gen_writeback(s, &decode, 0, s->T0); } /* - * Write back flags after last memory access. Some newer ALU instructions, as + * Write back flags after last memory access. Some older ALU instructions, as * well as SSE instructions, write flags in the gen_* function, but that can * cause incorrect tracking of CC_OP for instructions that write to both memory * and flags. diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index 1f90cf9640..f9bf9a6041 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -90,6 +90,7 @@ typedef enum X86OpSize { X86_SIZE_w, /* 16-bit */ X86_SIZE_x, /* 128/256-bit, based on operand size */ X86_SIZE_y, /* 32/64-bit, based on operand size */ + X86_SIZE_y_d64, /* 32/64-bit, based on 64-bit mode */ X86_SIZE_z, /* 16-bit for 16-bit operand size, else 32-bit */ X86_SIZE_z_f64, /* 32-bit for 32-bit operand size or 64-bit mode, else 16-bit */ @@ -108,12 +109,18 @@ typedef enum X86CPUIDFeature { X86_FEAT_AVX2, X86_FEAT_BMI1, X86_FEAT_BMI2, + X86_FEAT_CLFLUSH, + X86_FEAT_CLFLUSHOPT, + X86_FEAT_CLWB, X86_FEAT_CMOV, X86_FEAT_CMPCCXADD, X86_FEAT_F16C, X86_FEAT_FMA, + X86_FEAT_FSGSBASE, + X86_FEAT_FXSR, X86_FEAT_MOVBE, X86_FEAT_PCLMULQDQ, + X86_FEAT_POPCNT, X86_FEAT_SHA_NI, X86_FEAT_SSE, X86_FEAT_SSE2, @@ -122,6 +129,8 @@ typedef enum X86CPUIDFeature { X86_FEAT_SSE41, X86_FEAT_SSE42, X86_FEAT_SSE4A, + X86_FEAT_XSAVE, + X86_FEAT_XSAVEOPT, } X86CPUIDFeature; /* Execution flags */ @@ -142,8 +151,8 @@ typedef enum X86InsnCheck { X86_CHECK_i64 = 1, X86_CHECK_o64 = 2, - /* Fault outside protected mode */ - X86_CHECK_prot = 4, + /* Fault in vm86 mode */ + X86_CHECK_no_vm86 = 4, /* Privileged instruction checks */ X86_CHECK_cpl0 = 8, @@ -159,6 +168,17 @@ typedef enum X86InsnCheck { /* Fault if VEX.W=0 */ X86_CHECK_W1 = 256, + + /* Fault outside protected mode, possibly including vm86 mode */ + X86_CHECK_prot_or_vm86 = 512, + X86_CHECK_prot = X86_CHECK_prot_or_vm86 | X86_CHECK_no_vm86, + + /* Fault outside SMM */ + X86_CHECK_smm = 1024, + + /* Vendor-specific checks for Intel/AMD differences */ + X86_CHECK_i64_amd = 2048, + X86_CHECK_o64_intel = 4096, } X86InsnCheck; typedef enum X86InsnSpecial { @@ -170,8 +190,9 @@ typedef enum X86InsnSpecial { /* Always locked if it has a memory operand (XCHG) */ X86_SPECIAL_Locked, - /* Do not apply segment base to effective address */ - X86_SPECIAL_NoSeg, + /* Do not load effective address in s->A0 */ + X86_SPECIAL_NoLoadEA, + /* * Rd/Mb or Rd/Mw in the manual: register operand 0 is treated as 32 bits * (and writeback zero-extends it to 64 bits if applicable). PREFIX_DATA @@ -245,7 +266,7 @@ typedef struct X86DecodedInsn X86DecodedInsn; typedef void (*X86DecodeFunc)(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b); /* Code generation function. */ -typedef void (*X86GenFunc)(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode); +typedef void (*X86GenFunc)(DisasContext *s, X86DecodedInsn *decode); struct X86OpEntry { /* Based on the is_decode flags. */ @@ -271,6 +292,7 @@ struct X86OpEntry { unsigned valid_prefix:16; unsigned check:16; unsigned intercept:8; + bool has_intercept:1; bool is_decode:1; }; diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc index 4be3d9a6fb..11faa70b5e 100644 --- a/target/i386/tcg/emit.c.inc +++ b/target/i386/tcg/emit.c.inc @@ -60,8 +60,8 @@ typedef void (*SSEFunc_0_eppppii)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 even, TCGv_i32 odd); -static void gen_JMP_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode); -static void gen_JMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode); +static void gen_JMP_m(DisasContext *s, X86DecodedInsn *decode); +static void gen_JMP(DisasContext *s, X86DecodedInsn *decode); static inline TCGv_i32 tcg_constant8u_i32(uint8_t val) { @@ -242,12 +242,20 @@ static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v) tcg_gen_ld32u_tl(v, tcg_env, offsetof(CPUX86State,segs[op->n].selector)); break; +#ifndef CONFIG_USER_ONLY case X86_OP_CR: - tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, cr[op->n])); + if (op->n == 8) { + translator_io_start(&s->base); + gen_helper_read_cr8(v, tcg_env); + } else { + tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, cr[op->n])); + } break; case X86_OP_DR: - tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, dr[op->n])); + /* CR4.DE tested in the helper. */ + gen_helper_get_dr(v, tcg_env, tcg_constant_i32(op->n)); break; +#endif case X86_OP_INT: if (op->has_ea) { if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) { @@ -343,8 +351,20 @@ static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv 16, 16, 0); } break; +#ifndef CONFIG_USER_ONLY case X86_OP_CR: + if (op->n == 8) { + translator_io_start(&s->base); + } + gen_helper_write_crN(tcg_env, tcg_constant_i32(op->n), v); + s->base.is_jmp = DISAS_EOB_NEXT; + break; case X86_OP_DR: + /* CR4.DE tested in the helper. */ + gen_helper_set_dr(tcg_env, tcg_constant_i32(op->n), v); + s->base.is_jmp = DISAS_EOB_NEXT; + break; +#endif default: g_assert_not_reached(); } @@ -446,7 +466,7 @@ static const SSEFunc_0_epp fns_3dnow[] = { [0xbf] = gen_helper_pavgusb, }; -static void gen_3dnow(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_3dnow(DisasContext *s, X86DecodedInsn *decode) { uint8_t b = decode->immediate; SSEFunc_0_epp fn = b < ARRAY_SIZE(fns_3dnow) ? fns_3dnow[b] : NULL; @@ -479,7 +499,7 @@ static void gen_3dnow(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) * f3 = v*ss Vss, Hss, Wps * f2 = v*sd Vsd, Hsd, Wps */ -static inline void gen_unary_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_unary_fp_sse(DisasContext *s, X86DecodedInsn *decode, SSEFunc_0_epp pd_xmm, SSEFunc_0_epp ps_xmm, SSEFunc_0_epp pd_ymm, SSEFunc_0_epp ps_ymm, SSEFunc_0_eppp sd, SSEFunc_0_eppp ss) @@ -504,9 +524,9 @@ static inline void gen_unary_fp_sse(DisasContext *s, CPUX86State *env, X86Decode } } #define UNARY_FP_SSE(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_unary_fp_sse(s, env, decode, \ + gen_unary_fp_sse(s, decode, \ gen_helper_##lname##pd_xmm, \ gen_helper_##lname##ps_xmm, \ gen_helper_##lname##pd_ymm, \ @@ -522,7 +542,7 @@ UNARY_FP_SSE(VSQRT, sqrt) * f3 = v*ss Vss, Hss, Wps * f2 = v*sd Vsd, Hsd, Wps */ -static inline void gen_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_fp_sse(DisasContext *s, X86DecodedInsn *decode, SSEFunc_0_eppp pd_xmm, SSEFunc_0_eppp ps_xmm, SSEFunc_0_eppp pd_ymm, SSEFunc_0_eppp ps_ymm, SSEFunc_0_eppp sd, SSEFunc_0_eppp ss) @@ -543,9 +563,9 @@ static inline void gen_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn } #define FP_SSE(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_fp_sse(s, env, decode, \ + gen_fp_sse(s, decode, \ gen_helper_##lname##pd_xmm, \ gen_helper_##lname##ps_xmm, \ gen_helper_##lname##pd_ymm, \ @@ -561,7 +581,7 @@ FP_SSE(VDIV, div) FP_SSE(VMAX, max) #define FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, even, odd) \ -static void gen_##uname##Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname##Px(DisasContext *s, X86DecodedInsn *decode) \ { \ SSEFunc_0_eppppii xmm = s->vex_w ? gen_helper_fma4pd_xmm : gen_helper_fma4ps_xmm; \ SSEFunc_0_eppppii ymm = s->vex_w ? gen_helper_fma4pd_ymm : gen_helper_fma4ps_ymm; \ @@ -574,7 +594,7 @@ static void gen_##uname##Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *d #define FMA_SSE(uname, ptr0, ptr1, ptr2, flags) \ FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, flags, flags) \ -static void gen_##uname##Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname##Sx(DisasContext *s, X86DecodedInsn *decode) \ { \ SSEFunc_0_eppppi fn = s->vex_w ? gen_helper_fma4sd : gen_helper_fma4ss; \ \ @@ -607,10 +627,10 @@ FMA_SSE_PACKED(VFMSUBADD213, OP_PTR1, OP_PTR0, OP_PTR2, 0, float_muladd_negate_c FMA_SSE_PACKED(VFMSUBADD132, OP_PTR0, OP_PTR2, OP_PTR1, 0, float_muladd_negate_c) #define FP_UNPACK_SSE(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ /* PS maps to the DQ integer instruction, PD maps to QDQ. */ \ - gen_fp_sse(s, env, decode, \ + gen_fp_sse(s, decode, \ gen_helper_##lname##qdq_xmm, \ gen_helper_##lname##dq_xmm, \ gen_helper_##lname##qdq_ymm, \ @@ -624,7 +644,7 @@ FP_UNPACK_SSE(VUNPCKHPx, punpckh) * 00 = v*ps Vps, Wpd * f3 = v*ss Vss, Wps */ -static inline void gen_unary_fp32_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_unary_fp32_sse(DisasContext *s, X86DecodedInsn *decode, SSEFunc_0_epp ps_xmm, SSEFunc_0_epp ps_ymm, SSEFunc_0_eppp ss) @@ -649,9 +669,9 @@ illegal_op: gen_illegal_opcode(s); } #define UNARY_FP32_SSE(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_unary_fp32_sse(s, env, decode, \ + gen_unary_fp32_sse(s, decode, \ gen_helper_##lname##ps_xmm, \ gen_helper_##lname##ps_ymm, \ gen_helper_##lname##ss); \ @@ -663,7 +683,7 @@ UNARY_FP32_SSE(VRCP, rcp) * 66 = v*pd Vpd, Hpd, Wpd * f2 = v*ps Vps, Hps, Wps */ -static inline void gen_horizontal_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_horizontal_fp_sse(DisasContext *s, X86DecodedInsn *decode, SSEFunc_0_eppp pd_xmm, SSEFunc_0_eppp ps_xmm, SSEFunc_0_eppp pd_ymm, SSEFunc_0_eppp ps_ymm) { @@ -674,9 +694,9 @@ static inline void gen_horizontal_fp_sse(DisasContext *s, CPUX86State *env, X86D fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); } #define HORIZONTAL_FP_SSE(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_horizontal_fp_sse(s, env, decode, \ + gen_horizontal_fp_sse(s, decode, \ gen_helper_##lname##pd_xmm, gen_helper_##lname##ps_xmm, \ gen_helper_##lname##pd_ymm, gen_helper_##lname##ps_ymm); \ } @@ -684,7 +704,7 @@ HORIZONTAL_FP_SSE(VHADD, hadd) HORIZONTAL_FP_SSE(VHSUB, hsub) HORIZONTAL_FP_SSE(VADDSUB, addsub) -static inline void gen_ternary_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_ternary_sse(DisasContext *s, X86DecodedInsn *decode, int op3, SSEFunc_0_epppp xmm, SSEFunc_0_epppp ymm) { SSEFunc_0_epppp fn = s->vex_l ? ymm : xmm; @@ -695,21 +715,21 @@ static inline void gen_ternary_sse(DisasContext *s, CPUX86State *env, X86Decoded fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, ptr3); } #define TERNARY_SSE(uname, uvname, lname) \ -static void gen_##uvname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uvname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_ternary_sse(s, env, decode, (uint8_t)decode->immediate >> 4, \ + gen_ternary_sse(s, decode, (uint8_t)decode->immediate >> 4, \ gen_helper_##lname##_xmm, gen_helper_##lname##_ymm); \ } \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_ternary_sse(s, env, decode, 0, \ + gen_ternary_sse(s, decode, 0, \ gen_helper_##lname##_xmm, gen_helper_##lname##_ymm); \ } TERNARY_SSE(BLENDVPS, VBLENDVPS, blendvps) TERNARY_SSE(BLENDVPD, VBLENDVPD, blendvpd) TERNARY_SSE(PBLENDVB, VPBLENDVB, pblendvb) -static inline void gen_binary_imm_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_binary_imm_sse(DisasContext *s, X86DecodedInsn *decode, SSEFunc_0_epppi xmm, SSEFunc_0_epppi ymm) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); @@ -721,9 +741,9 @@ static inline void gen_binary_imm_sse(DisasContext *s, CPUX86State *env, X86Deco } #define BINARY_IMM_SSE(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_binary_imm_sse(s, env, decode, \ + gen_binary_imm_sse(s, decode, \ gen_helper_##lname##_xmm, \ gen_helper_##lname##_ymm); \ } @@ -739,7 +759,7 @@ BINARY_IMM_SSE(PCLMULQDQ, pclmulqdq) #define UNARY_INT_GVEC(uname, func, ...) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ int vec_len = vector_len(s, decode); \ \ @@ -757,7 +777,7 @@ UNARY_INT_GVEC(VPBROADCASTQ, tcg_gen_gvec_dup_mem, MO_64) #define BINARY_INT_GVEC(uname, func, ...) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ int vec_len = vector_len(s, decode); \ \ @@ -816,7 +836,7 @@ BINARY_INT_GVEC(PXOR, tcg_gen_gvec_xor, MO_64) * These are really the same encoding, because 1) V is the same as P when VEX.V * is not present 2) P and Q are the same as H and W apart from MM/XMM */ -static inline void gen_binary_int_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_binary_int_sse(DisasContext *s, X86DecodedInsn *decode, SSEFunc_0_eppp mmx, SSEFunc_0_eppp xmm, SSEFunc_0_eppp ymm) { assert(!!mmx == !!(decode->e.special == X86_SPECIAL_MMX)); @@ -837,9 +857,9 @@ static inline void gen_binary_int_sse(DisasContext *s, CPUX86State *env, X86Deco #define BINARY_INT_MMX(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_binary_int_sse(s, env, decode, \ + gen_binary_int_sse(s, decode, \ gen_helper_##lname##_mmx, \ gen_helper_##lname##_xmm, \ gen_helper_##lname##_ymm); \ @@ -886,9 +906,9 @@ BINARY_INT_MMX(PMULHRSW, pmulhrsw) /* Instructions with no MMX equivalent. */ #define BINARY_INT_SSE(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_binary_int_sse(s, env, decode, \ + gen_binary_int_sse(s, decode, \ NULL, \ gen_helper_##lname##_xmm, \ gen_helper_##lname##_ymm); \ @@ -911,7 +931,7 @@ BINARY_INT_SSE(VAESENC, aesenc) BINARY_INT_SSE(VAESENCLAST, aesenclast) #define UNARY_CMP_SSE(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ if (!s->vex_l) { \ gen_helper_##lname##_xmm(tcg_env, OP_PTR1, OP_PTR2); \ @@ -924,7 +944,7 @@ UNARY_CMP_SSE(VPTEST, ptest) UNARY_CMP_SSE(VTESTPS, vtestps) UNARY_CMP_SSE(VTESTPD, vtestpd) -static inline void gen_unary_int_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_unary_int_sse(DisasContext *s, X86DecodedInsn *decode, SSEFunc_0_epp xmm, SSEFunc_0_epp ymm) { if (!s->vex_l) { @@ -935,9 +955,9 @@ static inline void gen_unary_int_sse(DisasContext *s, CPUX86State *env, X86Decod } #define UNARY_INT_SSE(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_unary_int_sse(s, env, decode, \ + gen_unary_int_sse(s, decode, \ gen_helper_##lname##_xmm, \ gen_helper_##lname##_ymm); \ } @@ -969,7 +989,7 @@ UNARY_INT_SSE(VCVTTPS2DQ, cvttps2dq) UNARY_INT_SSE(VCVTPH2PS, cvtph2ps) -static inline void gen_unary_imm_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_unary_imm_sse(DisasContext *s, X86DecodedInsn *decode, SSEFunc_0_ppi xmm, SSEFunc_0_ppi ymm) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); @@ -981,9 +1001,9 @@ static inline void gen_unary_imm_sse(DisasContext *s, CPUX86State *env, X86Decod } #define UNARY_IMM_SSE(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_unary_imm_sse(s, env, decode, \ + gen_unary_imm_sse(s, decode, \ gen_helper_##lname##_xmm, \ gen_helper_##lname##_ymm); \ } @@ -996,7 +1016,7 @@ UNARY_IMM_SSE(VPERMQ, vpermq) UNARY_IMM_SSE(VPERMILPS_i, vpermilps_imm) UNARY_IMM_SSE(VPERMILPD_i, vpermilpd_imm) -static inline void gen_unary_imm_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_unary_imm_fp_sse(DisasContext *s, X86DecodedInsn *decode, SSEFunc_0_eppi xmm, SSEFunc_0_eppi ymm) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); @@ -1008,9 +1028,9 @@ static inline void gen_unary_imm_fp_sse(DisasContext *s, CPUX86State *env, X86De } #define UNARY_IMM_FP_SSE(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_unary_imm_fp_sse(s, env, decode, \ + gen_unary_imm_fp_sse(s, decode, \ gen_helper_##lname##_xmm, \ gen_helper_##lname##_ymm); \ } @@ -1018,7 +1038,7 @@ static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod UNARY_IMM_FP_SSE(VROUNDPS, roundps) UNARY_IMM_FP_SSE(VROUNDPD, roundpd) -static inline void gen_vexw_avx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_vexw_avx(DisasContext *s, X86DecodedInsn *decode, SSEFunc_0_eppp d_xmm, SSEFunc_0_eppp q_xmm, SSEFunc_0_eppp d_ymm, SSEFunc_0_eppp q_ymm) { @@ -1030,9 +1050,9 @@ static inline void gen_vexw_avx(DisasContext *s, CPUX86State *env, X86DecodedIns /* VEX.W affects whether to operate on 32- or 64-bit elements. */ #define VEXW_AVX(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_vexw_avx(s, env, decode, \ + gen_vexw_avx(s, decode, \ gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm, \ gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm); \ } @@ -1042,7 +1062,7 @@ VEXW_AVX(VPSRAV, vpsrav) VEXW_AVX(VPMASKMOV, vpmaskmov) /* Same as above, but with extra arguments to the helper. */ -static inline void gen_vsib_avx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_vsib_avx(DisasContext *s, X86DecodedInsn *decode, SSEFunc_0_epppti d_xmm, SSEFunc_0_epppti q_xmm, SSEFunc_0_epppti d_ymm, SSEFunc_0_epppti q_ymm) { @@ -1066,29 +1086,29 @@ static inline void gen_vsib_avx(DisasContext *s, CPUX86State *env, X86DecodedIns } } #define VSIB_AVX(uname, lname) \ -static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +static void gen_##uname(DisasContext *s, X86DecodedInsn *decode) \ { \ - gen_vsib_avx(s, env, decode, \ + gen_vsib_avx(s, decode, \ gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm, \ gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm); \ } VSIB_AVX(VPGATHERD, vpgatherd) VSIB_AVX(VPGATHERQ, vpgatherq) -static void gen_AAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_AAA(DisasContext *s, X86DecodedInsn *decode) { gen_update_cc_op(s); gen_helper_aaa(tcg_env); assume_cc_op(s, CC_OP_EFLAGS); } -static void gen_AAD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_AAD(DisasContext *s, X86DecodedInsn *decode) { gen_helper_aad(s->T0, s->T0, s->T1); prepare_update1_cc(decode, s, CC_OP_LOGICB); } -static void gen_AAM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_AAM(DisasContext *s, X86DecodedInsn *decode) { if (decode->immediate == 0) { gen_exception(s, EXCP00_DIVZ); @@ -1098,14 +1118,14 @@ static void gen_AAM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_AAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_AAS(DisasContext *s, X86DecodedInsn *decode) { gen_update_cc_op(s); gen_helper_aas(tcg_env); assume_cc_op(s, CC_OP_EFLAGS); } -static void gen_ADC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_ADC(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; TCGv c_in = tcg_temp_new(); @@ -1122,24 +1142,41 @@ static void gen_ADC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update3_cc(decode, s, CC_OP_ADCB + ot, c_in); } -/* ADCX/ADOX do not have memory operands and can use set_cc_op. */ -static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) +static void gen_ADCOX(DisasContext *s, X86DecodedInsn *decode, int cc_op) { - int opposite_cc_op; + MemOp ot = decode->op[0].ot; TCGv carry_in = NULL; - TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2); + TCGv *carry_out = (cc_op == CC_OP_ADCX ? &decode->cc_dst : &decode->cc_src2); TCGv zero; - if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) { - /* Re-use the carry-out from a previous round. */ - carry_in = carry_out; - } else { - /* We don't have a carry-in, get it out of EFLAGS. */ - if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { - gen_compute_eflags(s); + decode->cc_op = cc_op; + *carry_out = tcg_temp_new(); + if (CC_OP_HAS_EFLAGS(s->cc_op)) { + decode->cc_src = cpu_cc_src; + + /* Re-use the carry-out from a previous round? */ + if (s->cc_op == cc_op || s->cc_op == CC_OP_ADCOX) { + carry_in = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2); } - carry_in = s->tmp0; - tcg_gen_extract_tl(carry_in, cpu_cc_src, + + /* Preserve the opposite carry from previous rounds? */ + if (s->cc_op != cc_op && s->cc_op != CC_OP_EFLAGS) { + decode->cc_op = CC_OP_ADCOX; + if (carry_out == &decode->cc_dst) { + decode->cc_src2 = cpu_cc_src2; + } else { + decode->cc_dst = cpu_cc_dst; + } + } + } else { + decode->cc_src = tcg_temp_new(); + gen_mov_eflags(s, decode->cc_src); + } + + if (!carry_in) { + /* Get carry_in out of EFLAGS. */ + carry_in = tcg_temp_new(); + tcg_gen_extract_tl(carry_in, decode->cc_src, ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1); } @@ -1151,31 +1188,23 @@ static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) tcg_gen_ext32u_tl(s->T1, s->T1); tcg_gen_add_i64(s->T0, s->T0, s->T1); tcg_gen_add_i64(s->T0, s->T0, carry_in); - tcg_gen_shri_i64(carry_out, s->T0, 32); + tcg_gen_shri_i64(*carry_out, s->T0, 32); break; #endif default: zero = tcg_constant_tl(0); - tcg_gen_add2_tl(s->T0, carry_out, s->T0, zero, carry_in, zero); - tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero); + tcg_gen_add2_tl(s->T0, *carry_out, s->T0, zero, carry_in, zero); + tcg_gen_add2_tl(s->T0, *carry_out, s->T0, *carry_out, s->T1, zero); break; } - - opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX; - if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) { - /* Merge with the carry-out from the opposite instruction. */ - set_cc_op(s, CC_OP_ADCOX); - } else { - set_cc_op(s, cc_op); - } } -static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_ADCX(DisasContext *s, X86DecodedInsn *decode) { - gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADCX); + gen_ADCOX(s, decode, CC_OP_ADCX); } -static void gen_ADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_ADD(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; @@ -1188,12 +1217,12 @@ static void gen_ADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update2_cc(decode, s, CC_OP_ADDB + ot); } -static void gen_ADOX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_ADOX(DisasContext *s, X86DecodedInsn *decode) { - gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADOX); + gen_ADCOX(s, decode, CC_OP_ADOX); } -static void gen_AND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_AND(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; @@ -1206,7 +1235,7 @@ static void gen_AND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); } -static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_ANDN(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -1214,7 +1243,7 @@ static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); } -static void gen_ARPL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_ARPL(DisasContext *s, X86DecodedInsn *decode) { TCGv zf = tcg_temp_new(); TCGv flags = tcg_temp_new(); @@ -1235,7 +1264,7 @@ static void gen_ARPL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) decode->cc_op = CC_OP_EFLAGS; } -static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_BEXTR(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); @@ -1263,43 +1292,37 @@ static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); } -/* BLSI do not have memory operands and can use set_cc_op. */ -static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_BLSI(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; - tcg_gen_mov_tl(cpu_cc_src, s->T0); - tcg_gen_neg_tl(s->T1, s->T0); + /* input in T1, which is ready for prepare_update2_cc */ + tcg_gen_neg_tl(s->T0, s->T1); tcg_gen_and_tl(s->T0, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); - set_cc_op(s, CC_OP_BMILGB + ot); + prepare_update2_cc(decode, s, CC_OP_BMILGB + ot); } -/* BLSMSK do not have memory operands and can use set_cc_op. */ -static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_BLSMSK(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; - tcg_gen_mov_tl(cpu_cc_src, s->T0); - tcg_gen_subi_tl(s->T1, s->T0, 1); + /* input in T1, which is ready for prepare_update2_cc */ + tcg_gen_subi_tl(s->T0, s->T1, 1); tcg_gen_xor_tl(s->T0, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); - set_cc_op(s, CC_OP_BMILGB + ot); + prepare_update2_cc(decode, s, CC_OP_BMILGB + ot); } -/* BLSR do not have memory operands and can use set_cc_op. */ -static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_BLSR(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; - tcg_gen_mov_tl(cpu_cc_src, s->T0); - tcg_gen_subi_tl(s->T1, s->T0, 1); + /* input in T1, which is ready for prepare_update2_cc */ + tcg_gen_subi_tl(s->T0, s->T1, 1); tcg_gen_and_tl(s->T0, s->T0, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); - set_cc_op(s, CC_OP_BMILGB + ot); + prepare_update2_cc(decode, s, CC_OP_BMILGB + ot); } -static void gen_BOUND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_BOUND(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 op = tcg_temp_new_i32(); tcg_gen_trunc_tl_i32(op, s->T0); @@ -1310,7 +1333,48 @@ static void gen_BOUND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_BSWAP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +/* Non-standard convention - on entry T0 is zero-extended input, T1 is the output. */ +static void gen_BSF(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + + /* Only the Z bit is defined and it is related to the input. */ + decode->cc_dst = tcg_temp_new(); + decode->cc_op = CC_OP_LOGICB + ot; + tcg_gen_mov_tl(decode->cc_dst, s->T0); + + /* + * The manual says that the output is undefined when the + * input is zero, but real hardware leaves it unchanged, and + * real programs appear to depend on that. Accomplish this + * by passing the output as the value to return upon zero. + */ + tcg_gen_ctz_tl(s->T0, s->T0, s->T1); +} + +/* Non-standard convention - on entry T0 is zero-extended input, T1 is the output. */ +static void gen_BSR(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + + /* Only the Z bit is defined and it is related to the input. */ + decode->cc_dst = tcg_temp_new(); + decode->cc_op = CC_OP_LOGICB + ot; + tcg_gen_mov_tl(decode->cc_dst, s->T0); + + /* + * The manual says that the output is undefined when the + * input is zero, but real hardware leaves it unchanged, and + * real programs appear to depend on that. Accomplish this + * by passing the output as the value to return upon zero. + * Plus, return the bit index of the first 1 bit. + */ + tcg_gen_xori_tl(s->T1, s->T1, TARGET_LONG_BITS - 1); + tcg_gen_clz_tl(s->T0, s->T0, s->T1); + tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1); +} + +static void gen_BSWAP(DisasContext *s, X86DecodedInsn *decode) { #ifdef TARGET_X86_64 if (s->dflag == MO_64) { @@ -1321,7 +1385,7 @@ static void gen_BSWAP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_bswap32_tl(s->T0, s->T0, TCG_BSWAP_OZ); } -static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_BZHI(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); @@ -1341,26 +1405,26 @@ static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update2_cc(decode, s, CC_OP_BMILGB + ot); } -static void gen_CALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CALL(DisasContext *s, X86DecodedInsn *decode) { gen_push_v(s, eip_next_tl(s)); - gen_JMP(s, env, decode); + gen_JMP(s, decode); } -static void gen_CALL_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CALL_m(DisasContext *s, X86DecodedInsn *decode) { gen_push_v(s, eip_next_tl(s)); - gen_JMP_m(s, env, decode); + gen_JMP_m(s, decode); } -static void gen_CALLF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CALLF(DisasContext *s, X86DecodedInsn *decode) { gen_far_call(s); } -static void gen_CALLF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CALLF_m(DisasContext *s, X86DecodedInsn *decode) { - MemOp ot = decode->op[2].ot; + MemOp ot = decode->op[1].ot; gen_op_ld_v(s, ot, s->T0, s->A0); gen_add_A0_im(s, 1 << ot); @@ -1368,41 +1432,48 @@ static void gen_CALLF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod gen_far_call(s); } -static void gen_CBW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CBW(DisasContext *s, X86DecodedInsn *decode) { MemOp src_ot = decode->op[0].ot - 1; tcg_gen_ext_tl(s->T0, s->T0, src_ot | MO_SIGN); } -static void gen_CLC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CLC(DisasContext *s, X86DecodedInsn *decode) { gen_compute_eflags(s); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C); } -static void gen_CLD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CLD(DisasContext *s, X86DecodedInsn *decode) { tcg_gen_st_i32(tcg_constant_i32(1), tcg_env, offsetof(CPUX86State, df)); } -static void gen_CLI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CLI(DisasContext *s, X86DecodedInsn *decode) { gen_reset_eflags(s, IF_MASK); } -static void gen_CMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CLTS(DisasContext *s, X86DecodedInsn *decode) +{ + gen_helper_clts(tcg_env); + /* abort block because static cpu state changed */ + s->base.is_jmp = DISAS_EOB_NEXT; +} + +static void gen_CMC(DisasContext *s, X86DecodedInsn *decode) { gen_compute_eflags(s); tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C); } -static void gen_CMOVcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CMOVcc(DisasContext *s, X86DecodedInsn *decode) { gen_cmovcc1(s, decode->b & 0xf, s->T0, s->T1); } -static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CMPccXADD(DisasContext *s, X86DecodedInsn *decode) { TCGLabel *label_top = gen_new_label(); TCGLabel *label_bottom = gen_new_label(); @@ -1505,7 +1576,7 @@ static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec decode->cc_op = CC_OP_SUBB + ot; } -static void gen_CMPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CMPS(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { @@ -1515,7 +1586,65 @@ static void gen_CMPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[2].ot; + TCGv cmpv = tcg_temp_new(); + TCGv oldv = tcg_temp_new(); + TCGv newv = tcg_temp_new(); + TCGv dest; + + tcg_gen_ext_tl(cmpv, cpu_regs[R_EAX], ot); + tcg_gen_ext_tl(newv, s->T1, ot); + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv, + s->mem_index, ot | MO_LE); + } else { + tcg_gen_ext_tl(oldv, s->T0, ot); + if (decode->op[0].has_ea) { + /* + * Perform an unconditional store cycle like physical cpu; + * must be before changing accumulator to ensure + * idempotency if the store faults and the instruction + * is restarted + */ + tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv); + gen_op_st_v(s, ot, newv, s->A0); + } else { + /* + * Unlike the memory case, where "the destination operand receives + * a write cycle without regard to the result of the comparison", + * rm must not be touched altogether if the write fails, including + * not zero-extending it on 64-bit processors. So, precompute + * the result of a successful writeback and perform the movcond + * directly on cpu_regs. In case rm is part of RAX, note that this + * movcond and the one below are mutually exclusive is executed. + */ + dest = gen_op_deposit_reg_v(s, ot, decode->op[0].n, newv, newv); + tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, newv, dest); + } + decode->op[0].unit = X86_OP_SKIP; + } + + /* Write RAX only if the cmpxchg fails. */ + dest = gen_op_deposit_reg_v(s, ot, R_EAX, s->T0, oldv); + tcg_gen_movcond_tl(TCG_COND_NE, dest, oldv, cmpv, s->T0, dest); + + tcg_gen_mov_tl(s->cc_srcT, cmpv); + tcg_gen_sub_tl(cmpv, cmpv, oldv); + decode->cc_dst = cmpv; + decode->cc_src = oldv; + decode->cc_op = CC_OP_SUBB + ot; +} + +static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_update_eip_cur(s); + gen_helper_cpuid(tcg_env); +} + +static void gen_CRC32(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; @@ -1523,7 +1652,7 @@ static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot)); } -static void gen_CVTPI2Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CVTPI2Px(DisasContext *s, X86DecodedInsn *decode) { gen_helper_enter_mmx(tcg_env); if (s->prefix & PREFIX_DATA) { @@ -1533,7 +1662,7 @@ static void gen_CVTPI2Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco } } -static void gen_CVTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CVTPx2PI(DisasContext *s, X86DecodedInsn *decode) { gen_helper_enter_mmx(tcg_env); if (s->prefix & PREFIX_DATA) { @@ -1543,7 +1672,7 @@ static void gen_CVTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco } } -static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CVTTPx2PI(DisasContext *s, X86DecodedInsn *decode) { gen_helper_enter_mmx(tcg_env); if (s->prefix & PREFIX_DATA) { @@ -1553,28 +1682,28 @@ static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec } } -static void gen_CWD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_CWD(DisasContext *s, X86DecodedInsn *decode) { int shift = 8 << decode->op[0].ot; tcg_gen_sextract_tl(s->T0, s->T0, shift - 1, 1); } -static void gen_DAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_DAA(DisasContext *s, X86DecodedInsn *decode) { gen_update_cc_op(s); gen_helper_daa(tcg_env); assume_cc_op(s, CC_OP_EFLAGS); } -static void gen_DAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_DAS(DisasContext *s, X86DecodedInsn *decode) { gen_update_cc_op(s); gen_helper_das(tcg_env); assume_cc_op(s, CC_OP_EFLAGS); } -static void gen_DEC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_DEC(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; @@ -1588,40 +1717,40 @@ static void gen_DEC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update_cc_incdec(decode, s, CC_OP_DECB + ot); } -static void gen_DIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_DIV(DisasContext *s, X86DecodedInsn *decode) { - MemOp ot = decode->op[2].ot; + MemOp ot = decode->op[1].ot; switch(ot) { case MO_8: - gen_helper_divb_AL(tcg_env, s->T1); + gen_helper_divb_AL(tcg_env, s->T0); break; case MO_16: - gen_helper_divw_AX(tcg_env, s->T1); + gen_helper_divw_AX(tcg_env, s->T0); break; default: case MO_32: - gen_helper_divl_EAX(tcg_env, s->T1); + gen_helper_divl_EAX(tcg_env, s->T0); break; #ifdef TARGET_X86_64 case MO_64: - gen_helper_divq_EAX(tcg_env, s->T1); + gen_helper_divq_EAX(tcg_env, s->T0); break; #endif } } -static void gen_EMMS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_EMMS(DisasContext *s, X86DecodedInsn *decode) { gen_helper_emms(tcg_env); } -static void gen_ENTER(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_ENTER(DisasContext *s, X86DecodedInsn *decode) { gen_enter(s, decode->op[1].imm, decode->op[2].imm); } -static void gen_EXTRQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_EXTRQ_i(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 length = tcg_constant_i32(decode->immediate & 63); TCGv_i32 index = tcg_constant_i32((decode->immediate >> 8) & 63); @@ -1629,12 +1758,30 @@ static void gen_EXTRQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod gen_helper_extrq_i(tcg_env, OP_PTR0, index, length); } -static void gen_EXTRQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_EXTRQ_r(DisasContext *s, X86DecodedInsn *decode) { gen_helper_extrq_r(tcg_env, OP_PTR0, OP_PTR2); } -static void gen_HLT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_FXRSTOR(DisasContext *s, X86DecodedInsn *decode) +{ + if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { + gen_NM_exception(s); + } else { + gen_helper_fxrstor(tcg_env, s->A0); + } +} + +static void gen_FXSAVE(DisasContext *s, X86DecodedInsn *decode) +{ + if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { + gen_NM_exception(s); + } else { + gen_helper_fxsave(tcg_env, s->A0); + } +} + +static void gen_HLT(DisasContext *s, X86DecodedInsn *decode) { #ifdef CONFIG_SYSTEM_ONLY gen_update_cc_op(s); @@ -1644,30 +1791,30 @@ static void gen_HLT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) #endif } -static void gen_IDIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_IDIV(DisasContext *s, X86DecodedInsn *decode) { - MemOp ot = decode->op[2].ot; + MemOp ot = decode->op[1].ot; switch(ot) { case MO_8: - gen_helper_idivb_AL(tcg_env, s->T1); + gen_helper_idivb_AL(tcg_env, s->T0); break; case MO_16: - gen_helper_idivw_AX(tcg_env, s->T1); + gen_helper_idivw_AX(tcg_env, s->T0); break; default: case MO_32: - gen_helper_idivl_EAX(tcg_env, s->T1); + gen_helper_idivl_EAX(tcg_env, s->T0); break; #ifdef TARGET_X86_64 case MO_64: - gen_helper_idivq_EAX(tcg_env, s->T1); + gen_helper_idivq_EAX(tcg_env, s->T0); break; #endif } } -static void gen_IMUL3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_IMUL3(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; TCGv cc_src_rhs; @@ -1730,7 +1877,7 @@ static void gen_IMUL3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update2_cc(decode, s, CC_OP_MULB + ot); } -static void gen_IMUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_IMUL(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; TCGv cc_src_rhs; @@ -1788,12 +1935,12 @@ static void gen_IMUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update2_cc(decode, s, CC_OP_MULB + ot); } -static void gen_IN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_IN(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; TCGv_i32 port = tcg_temp_new_i32(); - tcg_gen_trunc_tl_i32(port, s->T1); + tcg_gen_trunc_tl_i32(port, s->T0); tcg_gen_ext16u_i32(port, port); if (!gen_check_io(s, ot, port, SVM_IOIO_TYPE_MASK)) { return; @@ -1804,7 +1951,7 @@ static void gen_IN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_bpt_io(s, port, ot); } -static void gen_INC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_INC(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; @@ -1818,7 +1965,7 @@ static void gen_INC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update_cc_incdec(decode, s, CC_OP_INCB + ot); } -static void gen_INS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_INS(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; TCGv_i32 port = tcg_temp_new_i32(); @@ -1838,7 +1985,7 @@ static void gen_INS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_INSERTQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_INSERTQ_i(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 length = tcg_constant_i32(decode->immediate & 63); TCGv_i32 index = tcg_constant_i32((decode->immediate >> 8) & 63); @@ -1846,17 +1993,17 @@ static void gen_INSERTQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec gen_helper_insertq_i(tcg_env, OP_PTR0, OP_PTR1, index, length); } -static void gen_INSERTQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_INSERTQ_r(DisasContext *s, X86DecodedInsn *decode) { gen_helper_insertq_r(tcg_env, OP_PTR0, OP_PTR2); } -static void gen_INT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_INT(DisasContext *s, X86DecodedInsn *decode) { gen_interrupt(s, decode->immediate); } -static void gen_INT1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_INT1(DisasContext *s, X86DecodedInsn *decode) { gen_update_cc_op(s); gen_update_eip_next(s); @@ -1864,19 +2011,19 @@ static void gen_INT1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) s->base.is_jmp = DISAS_NORETURN; } -static void gen_INT3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_INT3(DisasContext *s, X86DecodedInsn *decode) { gen_interrupt(s, EXCP03_INT3); } -static void gen_INTO(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_INTO(DisasContext *s, X86DecodedInsn *decode) { gen_update_cc_op(s); gen_update_eip_cur(s); gen_helper_into(tcg_env, cur_insn_len_i32(s)); } -static void gen_IRET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_IRET(DisasContext *s, X86DecodedInsn *decode) { if (!PE(s) || VM86(s)) { gen_helper_iret_real(tcg_env, tcg_constant_i32(s->dflag - 1)); @@ -1888,13 +2035,13 @@ static void gen_IRET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) s->base.is_jmp = DISAS_EOB_ONLY; } -static void gen_Jcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_Jcc(DisasContext *s, X86DecodedInsn *decode) { gen_bnd_jmp(s); gen_jcc(s, decode->b & 0xf, decode->immediate); } -static void gen_JCXZ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_JCXZ(DisasContext *s, X86DecodedInsn *decode) { TCGLabel *taken = gen_new_label(); @@ -1903,27 +2050,27 @@ static void gen_JCXZ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_conditional_jump_labels(s, decode->immediate, NULL, taken); } -static void gen_JMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_JMP(DisasContext *s, X86DecodedInsn *decode) { gen_update_cc_op(s); gen_jmp_rel(s, s->dflag, decode->immediate, 0); } -static void gen_JMP_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_JMP_m(DisasContext *s, X86DecodedInsn *decode) { gen_op_jmp_v(s, s->T0); gen_bnd_jmp(s); s->base.is_jmp = DISAS_JUMP; } -static void gen_JMPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_JMPF(DisasContext *s, X86DecodedInsn *decode) { gen_far_jmp(s); } -static void gen_JMPF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_JMPF_m(DisasContext *s, X86DecodedInsn *decode) { - MemOp ot = decode->op[2].ot; + MemOp ot = decode->op[1].ot; gen_op_ld_v(s, ot, s->T0, s->A0); gen_add_A0_im(s, 1 << ot); @@ -1931,7 +2078,7 @@ static void gen_JMPF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode gen_far_jmp(s); } -static void gen_LAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LAHF(DisasContext *s, X86DecodedInsn *decode) { if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) { return gen_illegal_opcode(s); @@ -1942,13 +2089,30 @@ static void gen_LAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_deposit_tl(cpu_regs[R_EAX], cpu_regs[R_EAX], s->T0, 8, 8); } -static void gen_LDMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LAR(DisasContext *s, X86DecodedInsn *decode) { - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T1); + MemOp ot = decode->op[0].ot; + TCGv result = tcg_temp_new(); + TCGv dest; + + gen_compute_eflags(s); + gen_update_cc_op(s); + gen_helper_lar(result, tcg_env, s->T0); + + /* Perform writeback here to skip it if ZF=0. */ + decode->op[0].unit = X86_OP_SKIP; + dest = gen_op_deposit_reg_v(s, ot, decode->op[0].n, result, result); + tcg_gen_movcond_tl(TCG_COND_TSTNE, dest, cpu_cc_src, tcg_constant_tl(CC_Z), + result, dest); +} + +static void gen_LDMXCSR(DisasContext *s, X86DecodedInsn *decode) +{ + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); gen_helper_ldmxcsr(tcg_env, s->tmp2_i32); } -static void gen_lxx_seg(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, int seg) +static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg) { MemOp ot = decode->op[0].ot; @@ -1960,39 +2124,45 @@ static void gen_lxx_seg(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod gen_movl_seg(s, seg, s->T1); } -static void gen_LDS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LDS(DisasContext *s, X86DecodedInsn *decode) { - gen_lxx_seg(s, env, decode, R_DS); + gen_lxx_seg(s, decode, R_DS); } -static void gen_LEA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LEA(DisasContext *s, X86DecodedInsn *decode) { - tcg_gen_mov_tl(s->T0, s->A0); + TCGv ea = gen_lea_modrm_1(s, decode->mem, false); + gen_lea_v_seg_dest(s, s->aflag, s->T0, ea, -1, -1); } -static void gen_LEAVE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LEAVE(DisasContext *s, X86DecodedInsn *decode) { gen_leave(s); } -static void gen_LES(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LES(DisasContext *s, X86DecodedInsn *decode) { - gen_lxx_seg(s, env, decode, R_ES); + gen_lxx_seg(s, decode, R_ES); } -static void gen_LFS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LFENCE(DisasContext *s, X86DecodedInsn *decode) { - gen_lxx_seg(s, env, decode, R_FS); + tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC); } -static void gen_LGS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LFS(DisasContext *s, X86DecodedInsn *decode) { - gen_lxx_seg(s, env, decode, R_GS); + gen_lxx_seg(s, decode, R_FS); } -static void gen_LODS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LGS(DisasContext *s, X86DecodedInsn *decode) { - MemOp ot = decode->op[2].ot; + gen_lxx_seg(s, decode, R_GS); +} + +static void gen_LODS(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { gen_repz(s, ot, gen_lods); } else { @@ -2000,7 +2170,7 @@ static void gen_LODS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_LOOP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LOOP(DisasContext *s, X86DecodedInsn *decode) { TCGLabel *taken = gen_new_label(); @@ -2010,7 +2180,7 @@ static void gen_LOOP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_conditional_jump_labels(s, decode->immediate, NULL, taken); } -static void gen_LOOPE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LOOPE(DisasContext *s, X86DecodedInsn *decode) { TCGLabel *taken = gen_new_label(); TCGLabel *not_taken = gen_new_label(); @@ -2022,7 +2192,7 @@ static void gen_LOOPE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_conditional_jump_labels(s, decode->immediate, not_taken, taken); } -static void gen_LOOPNE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LOOPNE(DisasContext *s, X86DecodedInsn *decode) { TCGLabel *taken = gen_new_label(); TCGLabel *not_taken = gen_new_label(); @@ -2034,18 +2204,58 @@ static void gen_LOOPNE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode gen_conditional_jump_labels(s, decode->immediate, not_taken, taken); } -static void gen_LSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LSL(DisasContext *s, X86DecodedInsn *decode) { - gen_lxx_seg(s, env, decode, R_SS); + MemOp ot = decode->op[0].ot; + TCGv result = tcg_temp_new(); + TCGv dest; + + gen_compute_eflags(s); + gen_update_cc_op(s); + gen_helper_lsl(result, tcg_env, s->T0); + + /* Perform writeback here to skip it if ZF=0. */ + decode->op[0].unit = X86_OP_SKIP; + dest = gen_op_deposit_reg_v(s, ot, decode->op[0].n, result, result); + tcg_gen_movcond_tl(TCG_COND_TSTNE, dest, cpu_cc_src, tcg_constant_tl(CC_Z), + result, dest); } -static void gen_MOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_LSS(DisasContext *s, X86DecodedInsn *decode) +{ + gen_lxx_seg(s, decode, R_SS); +} + +static void gen_LZCNT(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + + /* C bit (cc_src) is defined related to the input. */ + decode->cc_src = tcg_temp_new(); + decode->cc_dst = s->T0; + decode->cc_op = CC_OP_BMILGB + ot; + tcg_gen_mov_tl(decode->cc_src, s->T0); + + /* + * Reduce the target_ulong result by the number of zeros that + * we expect to find at the top. + */ + tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS); + tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - (8 << ot)); +} + +static void gen_MFENCE(DisasContext *s, X86DecodedInsn *decode) +{ + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); +} + +static void gen_MOV(DisasContext *s, X86DecodedInsn *decode) { /* nothing to do! */ } #define gen_NOP gen_MOV -static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_MASKMOV(DisasContext *s, X86DecodedInsn *decode) { gen_lea_v_seg(s, cpu_regs[R_EDI], R_DS, s->override); @@ -2056,7 +2266,7 @@ static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod } } -static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_MOVBE(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -2068,7 +2278,7 @@ static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_MOVD_from(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_MOVD_from(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; @@ -2086,7 +2296,7 @@ static void gen_MOVD_from(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec } } -static void gen_MOVD_to(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_MOVD_to(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; int vec_len = vector_len(s, decode); @@ -2108,12 +2318,12 @@ static void gen_MOVD_to(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod } } -static void gen_MOVDQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_MOVDQ(DisasContext *s, X86DecodedInsn *decode) { gen_store_sse(s, decode, decode->op[2].offset); } -static void gen_MOVMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_MOVMSK(DisasContext *s, X86DecodedInsn *decode) { typeof(gen_helper_movmskps_ymm) *ps, *pd, *fn; ps = s->vex_l ? gen_helper_movmskps_ymm : gen_helper_movmskps_xmm; @@ -2123,7 +2333,7 @@ static void gen_MOVMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); } -static void gen_MOVQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_MOVQ(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); int lo_ofs = vector_elem_offset(&decode->op[0], MO_64, 0); @@ -2145,14 +2355,14 @@ static void gen_MOVQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_MOVq_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_MOVq_dq(DisasContext *s, X86DecodedInsn *decode) { gen_helper_enter_mmx(tcg_env); /* Otherwise the same as any other movq. */ - return gen_MOVQ(s, env, decode); + return gen_MOVQ(s, decode); } -static void gen_MOVS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_MOVS(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { @@ -2162,7 +2372,7 @@ static void gen_MOVS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_MUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_MUL(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; @@ -2213,7 +2423,7 @@ static void gen_MUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) decode->cc_op = CC_OP_MULB + ot; } -static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_MULX(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -2239,7 +2449,7 @@ static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_NEG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_NEG(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; TCGv oldv = tcg_temp_new(); @@ -2266,7 +2476,7 @@ static void gen_NEG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) decode->cc_op = CC_OP_SUBB + ot; } -static void gen_NOT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_NOT(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; @@ -2279,7 +2489,7 @@ static void gen_NOT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_OR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_OR(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; @@ -2292,7 +2502,7 @@ static void gen_OR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); } -static void gen_OUT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_OUT(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; TCGv_i32 port = tcg_temp_new_i32(); @@ -2309,7 +2519,7 @@ static void gen_OUT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_bpt_io(s, port, ot); } -static void gen_OUTS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_OUTS(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; TCGv_i32 port = tcg_temp_new_i32(); @@ -2328,7 +2538,7 @@ static void gen_OUTS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PALIGNR(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); if (!(s->prefix & PREFIX_DATA)) { @@ -2340,7 +2550,7 @@ static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod } } -static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PANDN(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); @@ -2350,7 +2560,7 @@ static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) decode->op[1].offset, vec_len, vec_len); } -static void gen_PAUSE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PAUSE(DisasContext *s, X86DecodedInsn *decode) { gen_update_cc_op(s); gen_update_eip_next(s); @@ -2358,14 +2568,14 @@ static void gen_PAUSE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) s->base.is_jmp = DISAS_NORETURN; } -static void gen_PCMPESTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PCMPESTRI(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); gen_helper_pcmpestri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); assume_cc_op(s, CC_OP_EFLAGS); } -static void gen_PCMPESTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PCMPESTRM(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); gen_helper_pcmpestrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); @@ -2376,14 +2586,14 @@ static void gen_PCMPESTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec } } -static void gen_PCMPISTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PCMPISTRI(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); gen_helper_pcmpistri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); assume_cc_op(s, CC_OP_EFLAGS); } -static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PCMPISTRM(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); gen_helper_pcmpistrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); @@ -2394,17 +2604,17 @@ static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec } } -static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PDEP(DisasContext *s, X86DecodedInsn *decode) { gen_helper_pdep(s->T0, s->T0, s->T1); } -static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PEXT(DisasContext *s, X86DecodedInsn *decode) { gen_helper_pext(s->T0, s->T0, s->T1); } -static inline void gen_pextr(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, MemOp ot) +static inline void gen_pextr(DisasContext *s, X86DecodedInsn *decode, MemOp ot) { int vec_len = vector_len(s, decode); int mask = (vec_len >> ot) - 1; @@ -2430,23 +2640,23 @@ static inline void gen_pextr(DisasContext *s, CPUX86State *env, X86DecodedInsn * } } -static void gen_PEXTRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PEXTRB(DisasContext *s, X86DecodedInsn *decode) { - gen_pextr(s, env, decode, MO_8); + gen_pextr(s, decode, MO_8); } -static void gen_PEXTRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PEXTRW(DisasContext *s, X86DecodedInsn *decode) { - gen_pextr(s, env, decode, MO_16); + gen_pextr(s, decode, MO_16); } -static void gen_PEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PEXTR(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; - gen_pextr(s, env, decode, ot); + gen_pextr(s, decode, ot); } -static inline void gen_pinsr(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, MemOp ot) +static inline void gen_pinsr(DisasContext *s, X86DecodedInsn *decode, MemOp ot) { int vec_len = vector_len(s, decode); int mask = (vec_len >> ot) - 1; @@ -2477,19 +2687,19 @@ static inline void gen_pinsr(DisasContext *s, CPUX86State *env, X86DecodedInsn * } } -static void gen_PINSRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PINSRB(DisasContext *s, X86DecodedInsn *decode) { - gen_pinsr(s, env, decode, MO_8); + gen_pinsr(s, decode, MO_8); } -static void gen_PINSRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PINSRW(DisasContext *s, X86DecodedInsn *decode) { - gen_pinsr(s, env, decode, MO_16); + gen_pinsr(s, decode, MO_16); } -static void gen_PINSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PINSR(DisasContext *s, X86DecodedInsn *decode) { - gen_pinsr(s, env, decode, decode->op[2].ot); + gen_pinsr(s, decode, decode->op[2].ot); } static void gen_pmovmskb_i64(TCGv_i64 d, TCGv_i64 s) @@ -2529,7 +2739,7 @@ static void gen_pmovmskb_vec(unsigned vece, TCGv_vec d, TCGv_vec s) tcg_gen_or_vec(vece, d, d, t); } -static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode) { static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; static const GVecGen2 g = { @@ -2573,7 +2783,7 @@ static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco } } -static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_POP(DisasContext *s, X86DecodedInsn *decode) { X86DecodedOp *op = &decode->op[0]; MemOp ot = gen_pop_T0(s); @@ -2587,12 +2797,21 @@ static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_pop_update(s, ot); } -static void gen_POPA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_POPA(DisasContext *s, X86DecodedInsn *decode) { gen_popa(s); } -static void gen_POPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_POPCNT(DisasContext *s, X86DecodedInsn *decode) +{ + decode->cc_src = tcg_temp_new(); + decode->cc_op = CC_OP_POPCNT; + + tcg_gen_mov_tl(decode->cc_src, s->T0); + tcg_gen_ctpop_tl(s->T0, s->T0); +} + +static void gen_POPF(DisasContext *s, X86DecodedInsn *decode) { MemOp ot; int mask = TF_MASK | AC_MASK | ID_MASK | NT_MASK; @@ -2614,13 +2833,13 @@ static void gen_POPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) s->base.is_jmp = DISAS_EOB_NEXT; } -static void gen_PSHUFW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PSHUFW(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); gen_helper_pshufw_mmx(OP_PTR0, OP_PTR1, imm); } -static void gen_PSRLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PSRLW_i(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); @@ -2633,7 +2852,7 @@ static void gen_PSRLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod } } -static void gen_PSLLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PSLLW_i(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); @@ -2646,7 +2865,7 @@ static void gen_PSLLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod } } -static void gen_PSRAW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PSRAW_i(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); @@ -2658,7 +2877,7 @@ static void gen_PSRAW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod decode->immediate, vec_len, vec_len); } -static void gen_PSRLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PSRLD_i(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); @@ -2671,7 +2890,7 @@ static void gen_PSRLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod } } -static void gen_PSLLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PSLLD_i(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); @@ -2684,7 +2903,7 @@ static void gen_PSLLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod } } -static void gen_PSRAD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PSRAD_i(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); @@ -2696,7 +2915,7 @@ static void gen_PSRAD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod decode->immediate, vec_len, vec_len); } -static void gen_PSRLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PSRLQ_i(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); @@ -2709,7 +2928,7 @@ static void gen_PSRLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod } } -static void gen_PSLLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PSLLQ_i(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); @@ -2736,7 +2955,7 @@ static TCGv_ptr make_imm8u_xmm_vec(uint8_t imm, int vec_len) return ptr; } -static void gen_PSRLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PSRLDQ_i(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); TCGv_ptr imm_vec = make_imm8u_xmm_vec(decode->immediate, vec_len); @@ -2748,7 +2967,7 @@ static void gen_PSRLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco } } -static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PSLLDQ_i(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); TCGv_ptr imm_vec = make_imm8u_xmm_vec(decode->immediate, vec_len); @@ -2760,17 +2979,17 @@ static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco } } -static void gen_PUSH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PUSH(DisasContext *s, X86DecodedInsn *decode) { - gen_push_v(s, s->T1); + gen_push_v(s, s->T0); } -static void gen_PUSHA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PUSHA(DisasContext *s, X86DecodedInsn *decode) { gen_pusha(s); } -static void gen_PUSHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_PUSHF(DisasContext *s, X86DecodedInsn *decode) { gen_update_cc_op(s); gen_helper_read_eflags(s->T0, tcg_env); @@ -2778,16 +2997,16 @@ static void gen_PUSHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode, - bool *can_be_zero, TCGv *count) + bool *can_be_zero, TCGv *count, int unit) { MemOp ot = decode->op[0].ot; int mask = (ot <= MO_32 ? 0x1f : 0x3f); *can_be_zero = false; - switch (decode->op[2].unit) { + switch (unit) { case X86_OP_INT: *count = tcg_temp_new(); - tcg_gen_andi_tl(*count, s->T1, mask); + tcg_gen_andi_tl(*count, cpu_regs[R_ECX], mask); *can_be_zero = true; break; @@ -2967,12 +3186,12 @@ static void gen_rotc_mod(MemOp ot, TCGv count) * length - count, because (length-1) - (count-1) can be computed with * a XOR, and that is commutative unlike subtraction. */ -static void gen_RCL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_RCL(DisasContext *s, X86DecodedInsn *decode) { bool have_1bit_cin, can_be_zero; TCGv count; TCGLabel *zero_label = NULL; - MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit); TCGv low, high, low_count; if (!count) { @@ -3019,12 +3238,12 @@ static void gen_RCL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_RCR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_RCR(DisasContext *s, X86DecodedInsn *decode) { bool have_1bit_cin, can_be_zero; TCGv count; TCGLabel *zero_label = NULL; - MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit); TCGv low, high, high_count; if (!count) { @@ -3072,9 +3291,53 @@ static void gen_RCR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_RET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +#ifdef CONFIG_USER_ONLY +static void gen_unreachable(DisasContext *s, X86DecodedInsn *decode) { - int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0; + g_assert_not_reached(); +} +#endif + +#ifndef CONFIG_USER_ONLY +static void gen_RDMSR(DisasContext *s, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_update_eip_cur(s); + gen_helper_rdmsr(tcg_env); +} +#else +#define gen_RDMSR gen_unreachable +#endif + +static void gen_RDPMC(DisasContext *s, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_update_eip_cur(s); + translator_io_start(&s->base); + gen_helper_rdpmc(tcg_env); + s->base.is_jmp = DISAS_NORETURN; +} + +static void gen_RDTSC(DisasContext *s, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_update_eip_cur(s); + translator_io_start(&s->base); + gen_helper_rdtsc(tcg_env); +} + +static void gen_RDxxBASE(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv base = cpu_seg_base[s->modrm & 8 ? R_GS : R_FS]; + + /* Preserve hflags bits by testing CR4 at runtime. */ + gen_helper_cr4_testbit(tcg_env, tcg_constant_i32(CR4_FSGSBASE_MASK)); + tcg_gen_mov_tl(s->T0, base); +} + +static void gen_RET(DisasContext *s, X86DecodedInsn *decode) +{ + int16_t adjust = decode->e.op1 == X86_TYPE_I ? decode->immediate : 0; MemOp ot = gen_pop_T0(s); gen_stack_update(s, adjust + (1 << ot)); @@ -3083,9 +3346,9 @@ static void gen_RET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) s->base.is_jmp = DISAS_JUMP; } -static void gen_RETF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_RETF(DisasContext *s, X86DecodedInsn *decode) { - int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0; + int16_t adjust = decode->e.op1 == X86_TYPE_I ? decode->immediate : 0; if (!PE(s) || VM86(s)) { gen_lea_ss_ofs(s, s->A0, cpu_regs[R_ESP], 0); @@ -3154,11 +3417,11 @@ static void gen_rot_carry(X86DecodedInsn *decode, TCGv result, } } -static void gen_ROL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_ROL(DisasContext *s, X86DecodedInsn *decode) { bool can_be_zero; TCGv count; - MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit); TCGv_i32 temp32, count32; TCGv old = tcg_temp_new(); @@ -3182,11 +3445,11 @@ static void gen_ROL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_rot_overflow(decode, s->T0, old, can_be_zero, count); } -static void gen_ROR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_ROR(DisasContext *s, X86DecodedInsn *decode) { bool can_be_zero; TCGv count; - MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit); TCGv_i32 temp32, count32; TCGv old = tcg_temp_new(); @@ -3211,7 +3474,7 @@ static void gen_ROR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_rot_overflow(decode, s->T0, old, can_be_zero, count); } -static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_RORX(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; int mask = ot == MO_64 ? 63 : 31; @@ -3235,7 +3498,18 @@ static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_SAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +#ifndef CONFIG_USER_ONLY +static void gen_RSM(DisasContext *s, X86DecodedInsn *decode) +{ + gen_helper_rsm(tcg_env); + assume_cc_op(s, CC_OP_EFLAGS); + s->base.is_jmp = DISAS_EOB_ONLY; +} +#else +#define gen_RSM gen_UD +#endif + +static void gen_SAHF(DisasContext *s, X86DecodedInsn *decode) { if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) { return gen_illegal_opcode(s); @@ -3247,7 +3521,7 @@ static void gen_SAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0); } -static void gen_SALC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SALC(DisasContext *s, X86DecodedInsn *decode) { gen_compute_eflags_c(s, s->T0); tcg_gen_neg_tl(s->T0, s->T0); @@ -3283,11 +3557,11 @@ static void gen_shift_dynamic_flags(DisasContext *s, X86DecodedInsn *decode, TCG old_cc_op, tcg_constant_i32(cc_op)); } -static void gen_SAR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SAR(DisasContext *s, X86DecodedInsn *decode) { bool can_be_zero; TCGv count; - MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit); if (!count) { return; @@ -3305,7 +3579,7 @@ static void gen_SAR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SARX(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; int mask; @@ -3315,7 +3589,7 @@ static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_sar_tl(s->T0, s->T0, s->T1); } -static void gen_SBB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SBB(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; TCGv c_in = tcg_temp_new(); @@ -3337,7 +3611,7 @@ static void gen_SBB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update3_cc(decode, s, CC_OP_SBBB + ot, c_in); } -static void gen_SCAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SCAS(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[2].ot; if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { @@ -3347,27 +3621,32 @@ static void gen_SCAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_SETcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SETcc(DisasContext *s, X86DecodedInsn *decode) { gen_setcc1(s, decode->b & 0xf, s->T0); } -static void gen_SHA1NEXTE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SFENCE(DisasContext *s, X86DecodedInsn *decode) +{ + tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC); +} + +static void gen_SHA1NEXTE(DisasContext *s, X86DecodedInsn *decode) { gen_helper_sha1nexte(OP_PTR0, OP_PTR1, OP_PTR2); } -static void gen_SHA1MSG1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SHA1MSG1(DisasContext *s, X86DecodedInsn *decode) { gen_helper_sha1msg1(OP_PTR0, OP_PTR1, OP_PTR2); } -static void gen_SHA1MSG2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SHA1MSG2(DisasContext *s, X86DecodedInsn *decode) { gen_helper_sha1msg2(OP_PTR0, OP_PTR1, OP_PTR2); } -static void gen_SHA1RNDS4(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SHA1RNDS4(DisasContext *s, X86DecodedInsn *decode) { switch(decode->immediate & 3) { case 0: @@ -3385,17 +3664,17 @@ static void gen_SHA1RNDS4(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec } } -static void gen_SHA256MSG1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SHA256MSG1(DisasContext *s, X86DecodedInsn *decode) { gen_helper_sha256msg1(OP_PTR0, OP_PTR1, OP_PTR2); } -static void gen_SHA256MSG2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SHA256MSG2(DisasContext *s, X86DecodedInsn *decode) { gen_helper_sha256msg2(OP_PTR0, OP_PTR1, OP_PTR2); } -static void gen_SHA256RNDS2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SHA256RNDS2(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 wk0 = tcg_temp_new_i32(); TCGv_i32 wk1 = tcg_temp_new_i32(); @@ -3406,11 +3685,11 @@ static void gen_SHA256RNDS2(DisasContext *s, CPUX86State *env, X86DecodedInsn *d gen_helper_sha256rnds2(OP_PTR0, OP_PTR1, OP_PTR2, wk0, wk1); } -static void gen_SHL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SHL(DisasContext *s, X86DecodedInsn *decode) { bool can_be_zero; TCGv count; - MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit); if (!count) { return; @@ -3428,7 +3707,28 @@ static void gen_SHL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SHLD(DisasContext *s, X86DecodedInsn *decode) +{ + bool can_be_zero; + TCGv count; + int unit = decode->e.op3 == X86_TYPE_I ? X86_OP_IMM : X86_OP_INT; + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, unit); + + if (!count) { + return; + } + + decode->cc_dst = s->T0; + decode->cc_src = s->tmp0; + gen_shiftd_rm_T1(s, ot, false, count); + if (can_be_zero) { + gen_shift_dynamic_flags(s, decode, count, CC_OP_SHLB + ot); + } else { + decode->cc_op = CC_OP_SHLB + ot; + } +} + +static void gen_SHLX(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; int mask; @@ -3438,11 +3738,11 @@ static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_shl_tl(s->T0, s->T0, s->T1); } -static void gen_SHR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SHR(DisasContext *s, X86DecodedInsn *decode) { bool can_be_zero; TCGv count; - MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count); + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit); if (!count) { return; @@ -3460,7 +3760,28 @@ static void gen_SHR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SHRD(DisasContext *s, X86DecodedInsn *decode) +{ + bool can_be_zero; + TCGv count; + int unit = decode->e.op3 == X86_TYPE_I ? X86_OP_IMM : X86_OP_INT; + MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, unit); + + if (!count) { + return; + } + + decode->cc_dst = s->T0; + decode->cc_src = s->tmp0; + gen_shiftd_rm_T1(s, ot, true, count); + if (can_be_zero) { + gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot); + } else { + decode->cc_op = CC_OP_SARB + ot; + } +} + +static void gen_SHRX(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[0].ot; int mask; @@ -3470,37 +3791,37 @@ static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) tcg_gen_shr_tl(s->T0, s->T0, s->T1); } -static void gen_STC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_STC(DisasContext *s, X86DecodedInsn *decode) { gen_compute_eflags(s); tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C); } -static void gen_STD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_STD(DisasContext *s, X86DecodedInsn *decode) { tcg_gen_st_i32(tcg_constant_i32(-1), tcg_env, offsetof(CPUX86State, df)); } -static void gen_STI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_STI(DisasContext *s, X86DecodedInsn *decode) { gen_set_eflags(s, IF_MASK); s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; } -static void gen_VAESKEYGEN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VAESKEYGEN(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); assert(!s->vex_l); gen_helper_aeskeygenassist_xmm(tcg_env, OP_PTR0, OP_PTR1, imm); } -static void gen_STMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_STMXCSR(DisasContext *s, X86DecodedInsn *decode) { gen_helper_update_mxcsr(tcg_env); tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr)); } -static void gen_STOS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_STOS(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { @@ -3510,7 +3831,7 @@ static void gen_STOS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_SUB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SUB(DisasContext *s, X86DecodedInsn *decode) { MemOp ot = decode->op[1].ot; @@ -3526,12 +3847,71 @@ static void gen_SUB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update2_cc(decode, s, CC_OP_SUBB + ot); } -static void gen_UD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_SYSCALL(DisasContext *s, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_update_eip_cur(s); + gen_helper_syscall(tcg_env, cur_insn_len_i32(s)); + if (LMA(s)) { + assume_cc_op(s, CC_OP_EFLAGS); + } + + /* + * TF handling for the syscall insn is different. The TF bit is checked + * after the syscall insn completes. This allows #DB to not be + * generated after one has entered CPL0 if TF is set in FMASK. + */ + s->base.is_jmp = DISAS_EOB_RECHECK_TF; +} + +static void gen_SYSENTER(DisasContext *s, X86DecodedInsn *decode) +{ + gen_helper_sysenter(tcg_env); + s->base.is_jmp = DISAS_EOB_ONLY; +} + +static void gen_SYSEXIT(DisasContext *s, X86DecodedInsn *decode) +{ + gen_helper_sysexit(tcg_env, tcg_constant_i32(s->dflag - 1)); + s->base.is_jmp = DISAS_EOB_ONLY; +} + +static void gen_SYSRET(DisasContext *s, X86DecodedInsn *decode) +{ + gen_helper_sysret(tcg_env, tcg_constant_i32(s->dflag - 1)); + if (LMA(s)) { + assume_cc_op(s, CC_OP_EFLAGS); + } + + /* + * TF handling for the sysret insn is different. The TF bit is checked + * after the sysret insn completes. This allows #DB to be + * generated "as if" the syscall insn in userspace has just + * completed. + */ + s->base.is_jmp = DISAS_EOB_RECHECK_TF; +} + +static void gen_TZCNT(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + + /* C bit (cc_src) is defined related to the input. */ + decode->cc_src = tcg_temp_new(); + decode->cc_dst = s->T0; + decode->cc_op = CC_OP_BMILGB + ot; + tcg_gen_mov_tl(decode->cc_src, s->T0); + + /* A zero input returns the operand size. */ + tcg_gen_ctzi_tl(s->T0, s->T0, 8 << ot); +} + +static void gen_UD(DisasContext *s, X86DecodedInsn *decode) { gen_illegal_opcode(s); } -static void gen_VAESIMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VAESIMC(DisasContext *s, X86DecodedInsn *decode) { assert(!s->vex_l); gen_helper_aesimc_xmm(tcg_env, OP_PTR0, OP_PTR2); @@ -3586,7 +3966,7 @@ static const SSEFunc_0_eppp gen_helper_cmp_funcs[32][6] = { }; #undef SSE_CMP -static void gen_VCMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VCMP(DisasContext *s, X86DecodedInsn *decode) { int index = decode->immediate & (s->prefix & PREFIX_VEX ? 31 : 7); int b = @@ -3597,7 +3977,7 @@ static void gen_VCMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_helper_cmp_funcs[index][b](tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); } -static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VCOMI(DisasContext *s, X86DecodedInsn *decode) { SSEFunc_0_epp fn; fn = s->prefix & PREFIX_DATA ? gen_helper_comisd : gen_helper_comiss; @@ -3605,7 +3985,7 @@ static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) assume_cc_op(s, CC_OP_EFLAGS); } -static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VCVTPD2PS(DisasContext *s, X86DecodedInsn *decode) { if (s->vex_l) { gen_helper_cvtpd2ps_ymm(tcg_env, OP_PTR0, OP_PTR2); @@ -3614,7 +3994,7 @@ static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec } } -static void gen_VCVTPS2PD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VCVTPS2PD(DisasContext *s, X86DecodedInsn *decode) { if (s->vex_l) { gen_helper_cvtps2pd_ymm(tcg_env, OP_PTR0, OP_PTR2); @@ -3623,9 +4003,9 @@ static void gen_VCVTPS2PD(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec } } -static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VCVTPS2PH(DisasContext *s, X86DecodedInsn *decode) { - gen_unary_imm_fp_sse(s, env, decode, + gen_unary_imm_fp_sse(s, decode, gen_helper_cvtps2ph_xmm, gen_helper_cvtps2ph_ymm); /* @@ -3637,17 +4017,17 @@ static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec } } -static void gen_VCVTSD2SS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VCVTSD2SS(DisasContext *s, X86DecodedInsn *decode) { gen_helper_cvtsd2ss(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); } -static void gen_VCVTSS2SD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VCVTSS2SD(DisasContext *s, X86DecodedInsn *decode) { gen_helper_cvtss2sd(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); } -static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VCVTSI2Sx(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); TCGv_i32 in; @@ -3677,7 +4057,7 @@ static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec } } -static inline void gen_VCVTtSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_VCVTtSx2SI(DisasContext *s, X86DecodedInsn *decode, SSEFunc_i_ep ss2si, SSEFunc_l_ep ss2sq, SSEFunc_i_ep sd2si, SSEFunc_l_ep sd2sq) { @@ -3715,21 +4095,21 @@ static inline void gen_VCVTtSx2SI(DisasContext *s, CPUX86State *env, X86DecodedI #define gen_helper_cvttsd2sq NULL #endif -static void gen_VCVTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VCVTSx2SI(DisasContext *s, X86DecodedInsn *decode) { - gen_VCVTtSx2SI(s, env, decode, + gen_VCVTtSx2SI(s, decode, gen_helper_cvtss2si, gen_helper_cvtss2sq, gen_helper_cvtsd2si, gen_helper_cvtsd2sq); } -static void gen_VCVTTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VCVTTSx2SI(DisasContext *s, X86DecodedInsn *decode) { - gen_VCVTtSx2SI(s, env, decode, + gen_VCVTtSx2SI(s, decode, gen_helper_cvttss2si, gen_helper_cvttss2sq, gen_helper_cvttsd2si, gen_helper_cvttsd2sq); } -static void gen_VEXTRACTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VEXTRACTx128(DisasContext *s, X86DecodedInsn *decode) { int mask = decode->immediate & 1; int src_ofs = vector_elem_offset(&decode->op[1], MO_128, mask); @@ -3741,12 +4121,12 @@ static void gen_VEXTRACTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn * } } -static void gen_VEXTRACTPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VEXTRACTPS(DisasContext *s, X86DecodedInsn *decode) { - gen_pextr(s, env, decode, MO_32); + gen_pextr(s, decode, MO_32); } -static void gen_vinsertps(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode) { int val = decode->immediate; int dest_word = (val >> 4) & 3; @@ -3779,21 +4159,21 @@ static void gen_vinsertps(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec } } -static void gen_VINSERTPS_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VINSERTPS_r(DisasContext *s, X86DecodedInsn *decode) { int val = decode->immediate; tcg_gen_ld_i32(s->tmp2_i32, tcg_env, vector_elem_offset(&decode->op[2], MO_32, (val >> 6) & 3)); - gen_vinsertps(s, env, decode); + gen_vinsertps(s, decode); } -static void gen_VINSERTPS_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VINSERTPS_m(DisasContext *s, X86DecodedInsn *decode) { tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_vinsertps(s, env, decode); + gen_vinsertps(s, decode); } -static void gen_VINSERTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VINSERTx128(DisasContext *s, X86DecodedInsn *decode) { int mask = decode->immediate & 1; tcg_gen_gvec_mov(MO_64, @@ -3804,7 +4184,7 @@ static void gen_VINSERTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *d decode->op[1].offset + offsetof(YMMReg, YMM_X(!mask)), 16, 16); } -static inline void gen_maskmov(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, +static inline void gen_maskmov(DisasContext *s, X86DecodedInsn *decode, SSEFunc_0_eppt xmm, SSEFunc_0_eppt ymm) { if (!s->vex_l) { @@ -3814,17 +4194,17 @@ static inline void gen_maskmov(DisasContext *s, CPUX86State *env, X86DecodedInsn } } -static void gen_VMASKMOVPD_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMASKMOVPD_st(DisasContext *s, X86DecodedInsn *decode) { - gen_maskmov(s, env, decode, gen_helper_vpmaskmovq_st_xmm, gen_helper_vpmaskmovq_st_ymm); + gen_maskmov(s, decode, gen_helper_vpmaskmovq_st_xmm, gen_helper_vpmaskmovq_st_ymm); } -static void gen_VMASKMOVPS_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMASKMOVPS_st(DisasContext *s, X86DecodedInsn *decode) { - gen_maskmov(s, env, decode, gen_helper_vpmaskmovd_st_xmm, gen_helper_vpmaskmovd_st_ymm); + gen_maskmov(s, decode, gen_helper_vpmaskmovd_st_xmm, gen_helper_vpmaskmovd_st_ymm); } -static void gen_VMOVHPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMOVHPx_ld(DisasContext *s, X86DecodedInsn *decode) { gen_ldq_env_A0(s, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); if (decode->op[0].offset != decode->op[1].offset) { @@ -3833,12 +4213,12 @@ static void gen_VMOVHPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *de } } -static void gen_VMOVHPx_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMOVHPx_st(DisasContext *s, X86DecodedInsn *decode) { gen_stq_env_A0(s, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); } -static void gen_VMOVHPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMOVHPx(DisasContext *s, X86DecodedInsn *decode) { if (decode->op[0].offset != decode->op[2].offset) { tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); @@ -3850,7 +4230,7 @@ static void gen_VMOVHPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod } } -static void gen_VMOVHLPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMOVHLPS(DisasContext *s, X86DecodedInsn *decode) { tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); @@ -3860,7 +4240,7 @@ static void gen_VMOVHLPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco } } -static void gen_VMOVLHPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMOVLHPS(DisasContext *s, X86DecodedInsn *decode) { tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset); tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); @@ -3875,7 +4255,7 @@ static void gen_VMOVLHPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco * Use a gvec move to move everything above the bottom 64 bits. */ -static void gen_VMOVLPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMOVLPx(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); @@ -3884,7 +4264,7 @@ static void gen_VMOVLPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decod tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); } -static void gen_VMOVLPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMOVLPx_ld(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); @@ -3893,13 +4273,13 @@ static void gen_VMOVLPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *de tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0))); } -static void gen_VMOVLPx_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMOVLPx_st(DisasContext *s, X86DecodedInsn *decode) { tcg_gen_ld_i64(s->tmp1_i64, OP_PTR2, offsetof(ZMMReg, ZMM_Q(0))); tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); } -static void gen_VMOVSD_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMOVSD_ld(DisasContext *s, X86DecodedInsn *decode) { TCGv_i64 zero = tcg_constant_i64(0); @@ -3908,7 +4288,7 @@ static void gen_VMOVSD_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0))); } -static void gen_VMOVSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMOVSS(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); @@ -3917,7 +4297,7 @@ static void gen_VMOVSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); } -static void gen_VMOVSS_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMOVSS_ld(DisasContext *s, X86DecodedInsn *decode) { int vec_len = vector_len(s, decode); @@ -3926,55 +4306,55 @@ static void gen_VMOVSS_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *dec tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); } -static void gen_VMOVSS_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VMOVSS_st(DisasContext *s, X86DecodedInsn *decode) { tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); } -static void gen_VPMASKMOV_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VPMASKMOV_st(DisasContext *s, X86DecodedInsn *decode) { if (s->vex_w) { - gen_VMASKMOVPD_st(s, env, decode); + gen_VMASKMOVPD_st(s, decode); } else { - gen_VMASKMOVPS_st(s, env, decode); + gen_VMASKMOVPS_st(s, decode); } } -static void gen_VPERMD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VPERMD(DisasContext *s, X86DecodedInsn *decode) { assert(s->vex_l); gen_helper_vpermd_ymm(OP_PTR0, OP_PTR1, OP_PTR2); } -static void gen_VPERM2x128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VPERM2x128(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); assert(s->vex_l); gen_helper_vpermdq_ymm(OP_PTR0, OP_PTR1, OP_PTR2, imm); } -static void gen_VPHMINPOSUW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VPHMINPOSUW(DisasContext *s, X86DecodedInsn *decode) { assert(!s->vex_l); gen_helper_phminposuw_xmm(tcg_env, OP_PTR0, OP_PTR2); } -static void gen_VROUNDSD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VROUNDSD(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); assert(!s->vex_l); gen_helper_roundsd_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); } -static void gen_VROUNDSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VROUNDSS(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); assert(!s->vex_l); gen_helper_roundss_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); } -static void gen_VSHUF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VSHUF(DisasContext *s, X86DecodedInsn *decode) { TCGv_i32 imm = tcg_constant_i32(decode->immediate); SSEFunc_0_pppi ps, pd, fn; @@ -3984,7 +4364,7 @@ static void gen_VSHUF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) fn(OP_PTR0, OP_PTR1, OP_PTR2, imm); } -static void gen_VUCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VUCOMI(DisasContext *s, X86DecodedInsn *decode) { SSEFunc_0_epp fn; fn = s->prefix & PREFIX_DATA ? gen_helper_ucomisd : gen_helper_ucomiss; @@ -3992,7 +4372,7 @@ static void gen_VUCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode assume_cc_op(s, CC_OP_EFLAGS); } -static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VZEROALL(DisasContext *s, X86DecodedInsn *decode) { TCGv_ptr ptr = tcg_temp_new_ptr(); @@ -4001,7 +4381,7 @@ static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *deco tcg_constant_ptr(CPU_NB_REGS * sizeof(ZMMReg))); } -static void gen_VZEROUPPER(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_VZEROUPPER(DisasContext *s, X86DecodedInsn *decode) { int i; @@ -4011,7 +4391,7 @@ static void gen_VZEROUPPER(DisasContext *s, CPUX86State *env, X86DecodedInsn *de } } -static void gen_WAIT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_WAIT(DisasContext *s, X86DecodedInsn *decode) { if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) == (HF_MP_MASK | HF_TS_MASK)) { gen_NM_exception(s); @@ -4022,7 +4402,52 @@ static void gen_WAIT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_XCHG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +#ifndef CONFIG_USER_ONLY +static void gen_WRMSR(DisasContext *s, X86DecodedInsn *decode) +{ + gen_update_cc_op(s); + gen_update_eip_cur(s); + gen_helper_wrmsr(tcg_env); + s->base.is_jmp = DISAS_EOB_NEXT; +} +#else +#define gen_WRMSR gen_unreachable +#endif + +static void gen_WRxxBASE(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv base = cpu_seg_base[s->modrm & 8 ? R_GS : R_FS]; + + /* Preserve hflags bits by testing CR4 at runtime. */ + gen_helper_cr4_testbit(tcg_env, tcg_constant_i32(CR4_FSGSBASE_MASK)); + tcg_gen_mov_tl(base, s->T0); +} + +static void gen_XADD(DisasContext *s, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[1].ot; + + decode->cc_dst = tcg_temp_new(); + decode->cc_src = s->T1; + decode->cc_op = CC_OP_ADDB + ot; + + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_fetch_add_tl(s->T0, s->A0, s->T1, s->mem_index, ot | MO_LE); + tcg_gen_add_tl(decode->cc_dst, s->T0, s->T1); + } else { + tcg_gen_add_tl(decode->cc_dst, s->T0, s->T1); + /* + * NOTE: writing memory first is important for MMU exceptions, + * but "new result" wins for XADD AX, AX. + */ + gen_writeback(s, decode, 0, decode->cc_dst); + } + if (decode->op[0].has_ea || decode->op[2].n != decode->op[0].n) { + gen_writeback(s, decode, 2, s->T0); + } +} + +static void gen_XCHG(DisasContext *s, X86DecodedInsn *decode) { if (s->prefix & PREFIX_LOCK) { tcg_gen_atomic_xchg_tl(s->T0, s->A0, s->T1, @@ -4036,7 +4461,7 @@ static void gen_XCHG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) } } -static void gen_XLAT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_XLAT(DisasContext *s, X86DecodedInsn *decode) { /* AL is already zero-extended into s->T0. */ tcg_gen_add_tl(s->A0, cpu_regs[R_EBX], s->T0); @@ -4044,7 +4469,7 @@ static void gen_XLAT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) gen_op_ld_v(s, MO_8, s->T0, s->A0); } -static void gen_XOR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +static void gen_XOR(DisasContext *s, X86DecodedInsn *decode) { /* special case XOR reg, reg */ if (decode->op[1].unit == X86_OP_INT && @@ -4064,3 +4489,34 @@ static void gen_XOR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); } } + +static void gen_XRSTOR(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv_i64 features = tcg_temp_new_i64(); + + tcg_gen_concat_tl_i64(features, cpu_regs[R_EAX], cpu_regs[R_EDX]); + gen_helper_xrstor(tcg_env, s->A0, features); + if (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_MPX) { + /* + * XRSTOR is how MPX is enabled, which changes how + * we translate. Thus we need to end the TB. + */ + s->base.is_jmp = DISAS_EOB_NEXT; + } +} + +static void gen_XSAVE(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv_i64 features = tcg_temp_new_i64(); + + tcg_gen_concat_tl_i64(features, cpu_regs[R_EAX], cpu_regs[R_EDX]); + gen_helper_xsave(tcg_env, s->A0, features); +} + +static void gen_XSAVEOPT(DisasContext *s, X86DecodedInsn *decode) +{ + TCGv_i64 features = tcg_temp_new_i64(); + + tcg_gen_concat_tl_i64(features, cpu_regs[R_EAX], cpu_regs[R_EDX]); + gen_helper_xsave(tcg_env, s->A0, features); +} diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c index 715db1f232..aee3d19f29 100644 --- a/target/i386/tcg/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -2265,11 +2265,11 @@ void helper_sysexit(CPUX86State *env, int dflag) target_ulong helper_lsl(CPUX86State *env, target_ulong selector1) { unsigned int limit; - uint32_t e1, e2, eflags, selector; + uint32_t e1, e2, selector; int rpl, dpl, cpl, type; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env); + assert(CC_OP == CC_OP_EFLAGS); if ((selector & 0xfffc) == 0) { goto fail; } @@ -2301,22 +2301,22 @@ target_ulong helper_lsl(CPUX86State *env, target_ulong selector1) } if (dpl < cpl || dpl < rpl) { fail: - CC_SRC = eflags & ~CC_Z; + CC_SRC &= ~CC_Z; return 0; } } limit = get_seg_limit(e1, e2); - CC_SRC = eflags | CC_Z; + CC_SRC |= CC_Z; return limit; } target_ulong helper_lar(CPUX86State *env, target_ulong selector1) { - uint32_t e1, e2, eflags, selector; + uint32_t e1, e2, selector; int rpl, dpl, cpl, type; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env); + assert(CC_OP == CC_OP_EFLAGS); if ((selector & 0xfffc) == 0) { goto fail; } @@ -2351,11 +2351,11 @@ target_ulong helper_lar(CPUX86State *env, target_ulong selector1) } if (dpl < cpl || dpl < rpl) { fail: - CC_SRC = eflags & ~CC_Z; + CC_SRC &= ~CC_Z; return 0; } } - CC_SRC = eflags | CC_Z; + CC_SRC |= CC_Z; return e2 & 0x00f0ff00; } diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index 7fa0c5a06d..094aa56a20 100644 --- a/target/i386/tcg/sysemu/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -63,23 +63,13 @@ target_ulong helper_inl(CPUX86State *env, uint32_t port) cpu_get_mem_attrs(env), NULL); } -target_ulong helper_read_crN(CPUX86State *env, int reg) +target_ulong helper_read_cr8(CPUX86State *env) { - target_ulong val; - - switch (reg) { - default: - val = env->cr[reg]; - break; - case 8: - if (!(env->hflags2 & HF2_VINTR_MASK)) { - val = cpu_get_apic_tpr(env_archcpu(env)->apic_state); - } else { - val = env->int_ctl & V_TPR_MASK; - } - break; + if (!(env->hflags2 & HF2_VINTR_MASK)) { + return cpu_get_apic_tpr(env_archcpu(env)->apic_state); + } else { + return env->int_ctl & V_TPR_MASK; } - return val; } void helper_write_crN(CPUX86State *env, int reg, target_ulong t0) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index fcba9c155f..ad1819815a 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -246,10 +246,6 @@ STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs) STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val) STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val) STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val) -STUB_HELPER(rdmsr, TCGv_env env) -STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg) -STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg) -STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val) STUB_HELPER(stgi, TCGv_env env) STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type) STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag) @@ -257,7 +253,6 @@ STUB_HELPER(vmmcall, TCGv_env env) STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs) STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag) STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val) -STUB_HELPER(wrmsr, TCGv_env env) #endif static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num); @@ -439,13 +434,6 @@ static inline MemOp mo_stacksize(DisasContext *s) return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16; } -/* Select size 8 if lsb of B is clear, else OT. Used for decoding - byte vs word opcodes. */ -static inline MemOp mo_b_d(int b, MemOp ot) -{ - return b & 1 ? ot : MO_8; -} - /* Compute the result of writing t0 to the OT-sized register REG. * * If DEST is NULL, store the result into the register and return the @@ -540,15 +528,6 @@ static inline void gen_op_st_v(DisasContext *s, int idx, TCGv t0, TCGv a0) tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE); } -static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d) -{ - if (d == OR_TMP0) { - gen_op_st_v(s, idx, s->T0, s->A0); - } else { - gen_op_mov_reg_v(s, idx, d, s->T0); - } -} - static void gen_update_eip_next(DisasContext *s) { assert(s->pc_save != -1); @@ -729,11 +708,6 @@ static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign) return dst; } -static void gen_extu(MemOp ot, TCGv reg) -{ - gen_ext_tl(reg, reg, ot, false); -} - static void gen_exts(MemOp ot, TCGv reg) { gen_ext_tl(reg, reg, ot, true); @@ -837,17 +811,6 @@ static void gen_movs(DisasContext *s, MemOp ot) gen_op_add_reg(s, s->aflag, R_EDI, dshift); } -static void gen_op_update1_cc(DisasContext *s) -{ - tcg_gen_mov_tl(cpu_cc_dst, s->T0); -} - -static void gen_op_update2_cc(DisasContext *s) -{ - tcg_gen_mov_tl(cpu_cc_src, s->T1); - tcg_gen_mov_tl(cpu_cc_dst, s->T0); -} - /* compute all eflags to reg */ static void gen_mov_eflags(DisasContext *s, TCGv reg) { @@ -1448,64 +1411,11 @@ static bool check_cpl0(DisasContext *s) return false; } -static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result, - TCGv shm1, TCGv count, bool is_right) -{ - TCGv_i32 z32, s32, oldop; - TCGv z_tl; - - /* Store the results into the CC variables. If we know that the - variable must be dead, store unconditionally. Otherwise we'll - need to not disrupt the current contents. */ - z_tl = tcg_constant_tl(0); - if (cc_op_live[s->cc_op] & USES_CC_DST) { - tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl, - result, cpu_cc_dst); - } else { - tcg_gen_mov_tl(cpu_cc_dst, result); - } - if (cc_op_live[s->cc_op] & USES_CC_SRC) { - tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl, - shm1, cpu_cc_src); - } else { - tcg_gen_mov_tl(cpu_cc_src, shm1); - } - - /* Get the two potential CC_OP values into temporaries. */ - tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot); - if (s->cc_op == CC_OP_DYNAMIC) { - oldop = cpu_cc_op; - } else { - tcg_gen_movi_i32(s->tmp3_i32, s->cc_op); - oldop = s->tmp3_i32; - } - - /* Conditionally store the CC_OP value. */ - z32 = tcg_constant_i32(0); - s32 = tcg_temp_new_i32(); - tcg_gen_trunc_tl_i32(s32, count); - tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop); - - /* The CC_OP value is no longer predictable. */ - set_cc_op(s, CC_OP_DYNAMIC); -} - /* XXX: add faster immediate case */ -static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1, - bool is_right, TCGv count_in) +static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, + bool is_right, TCGv count) { target_ulong mask = (ot == MO_64 ? 63 : 31); - TCGv count; - - /* load */ - if (op1 == OR_TMP0) { - gen_op_ld_v(s, ot, s->T0, s->A0); - } else { - gen_op_mov_v_reg(s, ot, s->T0, op1); - } - - count = tcg_temp_new(); - tcg_gen_andi_tl(count, count_in, mask); switch (ot) { case MO_16: @@ -1567,11 +1477,6 @@ static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1, tcg_gen_or_tl(s->T0, s->T0, s->T1); break; } - - /* store */ - gen_op_st_rm_T0_A0(s, ot, op1); - - gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right); } #define X86_MAX_INSN_LENGTH 15 @@ -3081,108 +2986,11 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) CPUX86State *env = cpu_env(cpu); int prefixes = s->prefix; MemOp dflag = s->dflag; - int shift; MemOp ot; - int modrm, reg, rm, mod, op, opreg, val; + int modrm, reg, rm, mod, op, val; /* now check op code */ switch (b) { - /**************************/ - /* arith & logic */ - case 0x1c0: - case 0x1c1: /* xadd Ev, Gv */ - ot = mo_b_d(b, dflag); - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - mod = (modrm >> 6) & 3; - gen_op_mov_v_reg(s, ot, s->T0, reg); - if (mod == 3) { - rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(s, ot, s->T1, rm); - tcg_gen_add_tl(s->T0, s->T0, s->T1); - gen_op_mov_reg_v(s, ot, reg, s->T1); - gen_op_mov_reg_v(s, ot, rm, s->T0); - } else { - gen_lea_modrm(env, s, modrm); - if (s->prefix & PREFIX_LOCK) { - tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0, - s->mem_index, ot | MO_LE); - tcg_gen_add_tl(s->T0, s->T0, s->T1); - } else { - gen_op_ld_v(s, ot, s->T1, s->A0); - tcg_gen_add_tl(s->T0, s->T0, s->T1); - gen_op_st_v(s, ot, s->T0, s->A0); - } - gen_op_mov_reg_v(s, ot, reg, s->T1); - } - gen_op_update2_cc(s); - set_cc_op(s, CC_OP_ADDB + ot); - break; - case 0x1b0: - case 0x1b1: /* cmpxchg Ev, Gv */ - { - TCGv oldv, newv, cmpv, dest; - - ot = mo_b_d(b, dflag); - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - mod = (modrm >> 6) & 3; - oldv = tcg_temp_new(); - newv = tcg_temp_new(); - cmpv = tcg_temp_new(); - gen_op_mov_v_reg(s, ot, newv, reg); - tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]); - gen_extu(ot, cmpv); - if (s->prefix & PREFIX_LOCK) { - if (mod == 3) { - goto illegal_op; - } - gen_lea_modrm(env, s, modrm); - tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv, - s->mem_index, ot | MO_LE); - } else { - if (mod == 3) { - rm = (modrm & 7) | REX_B(s); - gen_op_mov_v_reg(s, ot, oldv, rm); - gen_extu(ot, oldv); - - /* - * Unlike the memory case, where "the destination operand receives - * a write cycle without regard to the result of the comparison", - * rm must not be touched altogether if the write fails, including - * not zero-extending it on 64-bit processors. So, precompute - * the result of a successful writeback and perform the movcond - * directly on cpu_regs. Also need to write accumulator first, in - * case rm is part of RAX too. - */ - dest = gen_op_deposit_reg_v(s, ot, rm, newv, newv); - tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, newv, dest); - } else { - gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, ot, oldv, s->A0); - - /* - * Perform an unconditional store cycle like physical cpu; - * must be before changing accumulator to ensure - * idempotency if the store faults and the instruction - * is restarted - */ - tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv); - gen_op_st_v(s, ot, newv, s->A0); - } - } - /* - * Write EAX only if the cmpxchg fails; reuse newv as the destination, - * since it's dead here. - */ - dest = gen_op_deposit_reg_v(s, ot, R_EAX, newv, oldv); - tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, dest, newv); - tcg_gen_mov_tl(cpu_cc_src, oldv); - tcg_gen_mov_tl(s->cc_srcT, cmpv); - tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv); - set_cc_op(s, CC_OP_SUBB + ot); - } - break; case 0x1c7: /* cmpxchg8b */ modrm = x86_ldub_code(env, s); mod = (modrm >> 6) & 3; @@ -3245,45 +3053,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } break; - /**************************/ - /* shifts */ - case 0x1a4: /* shld imm */ - op = 0; - shift = 1; - goto do_shiftd; - case 0x1a5: /* shld cl */ - op = 0; - shift = 0; - goto do_shiftd; - case 0x1ac: /* shrd imm */ - op = 1; - shift = 1; - goto do_shiftd; - case 0x1ad: /* shrd cl */ - op = 1; - shift = 0; - do_shiftd: - ot = dflag; - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - rm = (modrm & 7) | REX_B(s); - reg = ((modrm >> 3) & 7) | REX_R(s); - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - opreg = OR_TMP0; - } else { - opreg = rm; - } - gen_op_mov_v_reg(s, ot, s->T1, reg); - - if (shift) { - TCGv imm = tcg_constant_tl(x86_ldub_code(env, s)); - gen_shiftd_rm_T1(s, ot, opreg, op, imm); - } else { - gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]); - } - break; - /************************/ /* bit operations */ case 0x1ba: /* bt/bts/btr/btc Gv, im */ @@ -3423,147 +3192,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) break; } break; - case 0x1bc: /* bsf / tzcnt */ - case 0x1bd: /* bsr / lzcnt */ - ot = dflag; - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - gen_ld_modrm(env, s, modrm, ot); - gen_extu(ot, s->T0); - - /* Note that lzcnt and tzcnt are in different extensions. */ - if ((prefixes & PREFIX_REPZ) - && (b & 1 - ? s->cpuid_ext3_features & CPUID_EXT3_ABM - : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) { - int size = 8 << ot; - /* For lzcnt/tzcnt, C bit is defined related to the input. */ - tcg_gen_mov_tl(cpu_cc_src, s->T0); - if (b & 1) { - /* For lzcnt, reduce the target_ulong result by the - number of zeros that we expect to find at the top. */ - tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS); - tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size); - } else { - /* For tzcnt, a zero input must return the operand size. */ - tcg_gen_ctzi_tl(s->T0, s->T0, size); - } - /* For lzcnt/tzcnt, Z bit is defined related to the result. */ - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_BMILGB + ot); - } else { - /* For bsr/bsf, only the Z bit is defined and it is related - to the input and not the result. */ - tcg_gen_mov_tl(cpu_cc_dst, s->T0); - set_cc_op(s, CC_OP_LOGICB + ot); - - /* ??? The manual says that the output is undefined when the - input is zero, but real hardware leaves it unchanged, and - real programs appear to depend on that. Accomplish this - by passing the output as the value to return upon zero. */ - if (b & 1) { - /* For bsr, return the bit index of the first 1 bit, - not the count of leading zeros. */ - tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1); - tcg_gen_clz_tl(s->T0, s->T0, s->T1); - tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1); - } else { - tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]); - } - } - gen_op_mov_reg_v(s, ot, reg, s->T0); - break; - case 0x130: /* wrmsr */ - case 0x132: /* rdmsr */ - if (check_cpl0(s)) { - gen_update_cc_op(s); - gen_update_eip_cur(s); - if (b & 2) { - gen_helper_rdmsr(tcg_env); - } else { - gen_helper_wrmsr(tcg_env); - s->base.is_jmp = DISAS_EOB_NEXT; - } - } - break; - case 0x131: /* rdtsc */ - gen_update_cc_op(s); - gen_update_eip_cur(s); - translator_io_start(&s->base); - gen_helper_rdtsc(tcg_env); - break; - case 0x133: /* rdpmc */ - gen_update_cc_op(s); - gen_update_eip_cur(s); - gen_helper_rdpmc(tcg_env); - s->base.is_jmp = DISAS_NORETURN; - break; - case 0x134: /* sysenter */ - /* For AMD SYSENTER is not valid in long mode */ - if (LMA(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) { - goto illegal_op; - } - if (!PE(s)) { - gen_exception_gpf(s); - } else { - gen_helper_sysenter(tcg_env); - s->base.is_jmp = DISAS_EOB_ONLY; - } - break; - case 0x135: /* sysexit */ - /* For AMD SYSEXIT is not valid in long mode */ - if (LMA(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) { - goto illegal_op; - } - if (!PE(s) || CPL(s) != 0) { - gen_exception_gpf(s); - } else { - gen_helper_sysexit(tcg_env, tcg_constant_i32(dflag - 1)); - s->base.is_jmp = DISAS_EOB_ONLY; - } - break; - case 0x105: /* syscall */ - /* For Intel SYSCALL is only valid in long mode */ - if (!LMA(s) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) { - goto illegal_op; - } - gen_update_cc_op(s); - gen_update_eip_cur(s); - gen_helper_syscall(tcg_env, cur_insn_len_i32(s)); - /* condition codes are modified only in long mode */ - if (LMA(s)) { - assume_cc_op(s, CC_OP_EFLAGS); - } - /* TF handling for the syscall insn is different. The TF bit is checked - after the syscall insn completes. This allows #DB to not be - generated after one has entered CPL0 if TF is set in FMASK. */ - s->base.is_jmp = DISAS_EOB_RECHECK_TF; - break; - case 0x107: /* sysret */ - /* For Intel SYSRET is only valid in long mode */ - if (!LMA(s) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) { - goto illegal_op; - } - if (!PE(s) || CPL(s) != 0) { - gen_exception_gpf(s); - } else { - gen_helper_sysret(tcg_env, tcg_constant_i32(dflag - 1)); - /* condition codes are modified only in long mode */ - if (LMA(s)) { - assume_cc_op(s, CC_OP_EFLAGS); - } - /* TF handling for the sysret insn is different. The TF bit is - checked after the sysret insn completes. This allows #DB to be - generated "as if" the syscall insn in userspace has just - completed. */ - s->base.is_jmp = DISAS_EOB_RECHECK_TF; - } - break; - case 0x1a2: /* cpuid */ - gen_update_cc_op(s); - gen_update_eip_cur(s); - gen_helper_cpuid(tcg_env); - break; case 0x100: modrm = x86_ldub_code(env, s); mod = (modrm >> 6) & 3; @@ -3967,39 +3595,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } break; - case 0x108: /* invd */ - case 0x109: /* wbinvd; wbnoinvd with REPZ prefix */ - if (check_cpl0(s)) { - gen_svm_check_intercept(s, (b & 1) ? SVM_EXIT_WBINVD : SVM_EXIT_INVD); - /* nothing to do */ - } - break; - case 0x102: /* lar */ - case 0x103: /* lsl */ - { - TCGLabel *label1; - TCGv t0; - if (!PE(s) || VM86(s)) - goto illegal_op; - ot = dflag != MO_16 ? MO_32 : MO_16; - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - gen_ld_modrm(env, s, modrm, MO_16); - t0 = tcg_temp_new(); - gen_update_cc_op(s); - if (b == 0x102) { - gen_helper_lar(t0, tcg_env, s->T0); - } else { - gen_helper_lsl(t0, tcg_env, s->T0); - } - tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z); - label1 = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1); - gen_op_mov_reg_v(s, ot, reg, t0); - gen_set_label(label1); - set_cc_op(s, CC_OP_EFLAGS); - } - break; case 0x11a: modrm = x86_ldub_code(env, s); if (s->flags & HF_MPX_EN_MASK) { @@ -4191,311 +3786,6 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) } gen_nop_modrm(env, s, modrm); break; - - case 0x120: /* mov reg, crN */ - case 0x122: /* mov crN, reg */ - if (!check_cpl0(s)) { - break; - } - modrm = x86_ldub_code(env, s); - /* - * Ignore the mod bits (assume (modrm&0xc0)==0xc0). - * AMD documentation (24594.pdf) and testing of Intel 386 and 486 - * processors all show that the mod bits are assumed to be 1's, - * regardless of actual values. - */ - rm = (modrm & 7) | REX_B(s); - reg = ((modrm >> 3) & 7) | REX_R(s); - switch (reg) { - case 0: - if ((prefixes & PREFIX_LOCK) && - (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) { - reg = 8; - } - break; - case 2: - case 3: - case 4: - case 8: - break; - default: - goto unknown_op; - } - ot = (CODE64(s) ? MO_64 : MO_32); - - translator_io_start(&s->base); - if (b & 2) { - gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg); - gen_op_mov_v_reg(s, ot, s->T0, rm); - gen_helper_write_crN(tcg_env, tcg_constant_i32(reg), s->T0); - s->base.is_jmp = DISAS_EOB_NEXT; - } else { - gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg); - gen_helper_read_crN(s->T0, tcg_env, tcg_constant_i32(reg)); - gen_op_mov_reg_v(s, ot, rm, s->T0); - } - break; - - case 0x121: /* mov reg, drN */ - case 0x123: /* mov drN, reg */ - if (check_cpl0(s)) { - modrm = x86_ldub_code(env, s); - /* Ignore the mod bits (assume (modrm&0xc0)==0xc0). - * AMD documentation (24594.pdf) and testing of - * intel 386 and 486 processors all show that the mod bits - * are assumed to be 1's, regardless of actual values. - */ - rm = (modrm & 7) | REX_B(s); - reg = ((modrm >> 3) & 7) | REX_R(s); - if (CODE64(s)) - ot = MO_64; - else - ot = MO_32; - if (reg >= 8) { - goto illegal_op; - } - if (b & 2) { - gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg); - gen_op_mov_v_reg(s, ot, s->T0, rm); - tcg_gen_movi_i32(s->tmp2_i32, reg); - gen_helper_set_dr(tcg_env, s->tmp2_i32, s->T0); - s->base.is_jmp = DISAS_EOB_NEXT; - } else { - gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg); - tcg_gen_movi_i32(s->tmp2_i32, reg); - gen_helper_get_dr(s->T0, tcg_env, s->tmp2_i32); - gen_op_mov_reg_v(s, ot, rm, s->T0); - } - } - break; - case 0x106: /* clts */ - if (check_cpl0(s)) { - gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0); - gen_helper_clts(tcg_env); - /* abort block because static cpu state changed */ - s->base.is_jmp = DISAS_EOB_NEXT; - } - break; - /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */ - case 0x1ae: - modrm = x86_ldub_code(env, s); - switch (modrm) { - CASE_MODRM_MEM_OP(0): /* fxsave */ - if (!(s->cpuid_features & CPUID_FXSR) - || (prefixes & PREFIX_LOCK)) { - goto illegal_op; - } - if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { - gen_exception(s, EXCP07_PREX); - break; - } - gen_lea_modrm(env, s, modrm); - gen_helper_fxsave(tcg_env, s->A0); - break; - - CASE_MODRM_MEM_OP(1): /* fxrstor */ - if (!(s->cpuid_features & CPUID_FXSR) - || (prefixes & PREFIX_LOCK)) { - goto illegal_op; - } - if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { - gen_exception(s, EXCP07_PREX); - break; - } - gen_lea_modrm(env, s, modrm); - gen_helper_fxrstor(tcg_env, s->A0); - break; - - CASE_MODRM_MEM_OP(2): /* ldmxcsr */ - if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) { - goto illegal_op; - } - if (s->flags & HF_TS_MASK) { - gen_exception(s, EXCP07_PREX); - break; - } - gen_lea_modrm(env, s, modrm); - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_helper_ldmxcsr(tcg_env, s->tmp2_i32); - break; - - CASE_MODRM_MEM_OP(3): /* stmxcsr */ - if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) { - goto illegal_op; - } - if (s->flags & HF_TS_MASK) { - gen_exception(s, EXCP07_PREX); - break; - } - gen_helper_update_mxcsr(tcg_env); - gen_lea_modrm(env, s, modrm); - tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr)); - gen_op_st_v(s, MO_32, s->T0, s->A0); - break; - - CASE_MODRM_MEM_OP(4): /* xsave */ - if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 - || (prefixes & (PREFIX_LOCK | PREFIX_DATA - | PREFIX_REPZ | PREFIX_REPNZ))) { - goto illegal_op; - } - gen_lea_modrm(env, s, modrm); - tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], - cpu_regs[R_EDX]); - gen_helper_xsave(tcg_env, s->A0, s->tmp1_i64); - break; - - CASE_MODRM_MEM_OP(5): /* xrstor */ - if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 - || (prefixes & (PREFIX_LOCK | PREFIX_DATA - | PREFIX_REPZ | PREFIX_REPNZ))) { - goto illegal_op; - } - gen_lea_modrm(env, s, modrm); - tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], - cpu_regs[R_EDX]); - gen_helper_xrstor(tcg_env, s->A0, s->tmp1_i64); - /* XRSTOR is how MPX is enabled, which changes how - we translate. Thus we need to end the TB. */ - s->base.is_jmp = DISAS_EOB_NEXT; - break; - - CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */ - if (prefixes & PREFIX_LOCK) { - goto illegal_op; - } - if (prefixes & PREFIX_DATA) { - /* clwb */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) { - goto illegal_op; - } - gen_nop_modrm(env, s, modrm); - } else { - /* xsaveopt */ - if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0 - || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0 - || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) { - goto illegal_op; - } - gen_lea_modrm(env, s, modrm); - tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], - cpu_regs[R_EDX]); - gen_helper_xsaveopt(tcg_env, s->A0, s->tmp1_i64); - } - break; - - CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */ - if (prefixes & PREFIX_LOCK) { - goto illegal_op; - } - if (prefixes & PREFIX_DATA) { - /* clflushopt */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) { - goto illegal_op; - } - } else { - /* clflush */ - if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) - || !(s->cpuid_features & CPUID_CLFLUSH)) { - goto illegal_op; - } - } - gen_nop_modrm(env, s, modrm); - break; - - case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */ - case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */ - case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */ - case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */ - if (CODE64(s) - && (prefixes & PREFIX_REPZ) - && !(prefixes & PREFIX_LOCK) - && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) { - TCGv base, treg, src, dst; - - /* Preserve hflags bits by testing CR4 at runtime. */ - tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK); - gen_helper_cr4_testbit(tcg_env, s->tmp2_i32); - - base = cpu_seg_base[modrm & 8 ? R_GS : R_FS]; - treg = cpu_regs[(modrm & 7) | REX_B(s)]; - - if (modrm & 0x10) { - /* wr*base */ - dst = base, src = treg; - } else { - /* rd*base */ - dst = treg, src = base; - } - - if (s->dflag == MO_32) { - tcg_gen_ext32u_tl(dst, src); - } else { - tcg_gen_mov_tl(dst, src); - } - break; - } - goto unknown_op; - - case 0xf8 ... 0xff: /* sfence */ - if (!(s->cpuid_features & CPUID_SSE) - || (prefixes & PREFIX_LOCK)) { - goto illegal_op; - } - tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC); - break; - case 0xe8 ... 0xef: /* lfence */ - if (!(s->cpuid_features & CPUID_SSE) - || (prefixes & PREFIX_LOCK)) { - goto illegal_op; - } - tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC); - break; - case 0xf0 ... 0xf7: /* mfence */ - if (!(s->cpuid_features & CPUID_SSE2) - || (prefixes & PREFIX_LOCK)) { - goto illegal_op; - } - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC); - break; - - default: - goto unknown_op; - } - break; - - case 0x1aa: /* rsm */ - gen_svm_check_intercept(s, SVM_EXIT_RSM); - if (!(s->flags & HF_SMM_MASK)) - goto illegal_op; -#ifdef CONFIG_USER_ONLY - /* we should not be in SMM mode */ - g_assert_not_reached(); -#else - gen_helper_rsm(tcg_env); - assume_cc_op(s, CC_OP_EFLAGS); -#endif /* CONFIG_USER_ONLY */ - s->base.is_jmp = DISAS_EOB_ONLY; - break; - case 0x1b8: /* SSE4.2 popcnt */ - if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) != - PREFIX_REPZ) - goto illegal_op; - if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT)) - goto illegal_op; - - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | REX_R(s); - - ot = dflag; - gen_ld_modrm(env, s, modrm, ot); - gen_extu(ot, s->T0); - tcg_gen_mov_tl(cpu_cc_src, s->T0); - tcg_gen_ctpop_tl(s->T0, s->T0); - gen_op_mov_reg_v(s, ot, reg, s->T0); - - set_cc_op(s, CC_OP_POPCNT); - break; default: g_assert_not_reached(); }