diff --git a/bochs/CHANGES b/bochs/CHANGES index d3f9bc740..26cde6e81 100644 --- a/bochs/CHANGES +++ b/bochs/CHANGES @@ -34,6 +34,12 @@ Changes in 2.1 (July ??, 2003): RSQRTSS_VssWss, RSQRTPS_VpsWps, RCPPS_VpsWps, RCPSS_VssWss +[+] Add emulation of SSE3 (PNI) instructions + + Currently we have only 3 PNI opcodes still not implemented: + + FISTTP m16int, FISTTP m32int, FISTTP m64int + [+] Fixed: Unallowed lock prefix cases must cause #UD exception [+] Fixed FXSAVE/FXRSTOR instructions @@ -41,7 +47,7 @@ Changes in 2.1 (July ??, 2003): [+] Fixed fetchdecode bug caused #UD in SYSENTER/SYSEXIT instructions in 32bit mode. -[+] Fixed fetchdecode bug caused wrong decoding of opcodes containing +[+] Fixed fetchdecode64 bug caused wrong decoding of opcodes containing BxImmediate_IvIw or BxImmediate_IwIb in x86-64. [+] Hundreds of bugfixes in FPU emulation after checking of the emulation diff --git a/bochs/config.h.in b/bochs/config.h.in index fb1718f47..3aaa3a1ed 100644 --- a/bochs/config.h.in +++ b/bochs/config.h.in @@ -708,6 +708,7 @@ typedef #define BX_SUPPORT_3DNOW 0 #define BX_SUPPORT_SSE 0 #define BX_SUPPORT_DAZ 0 +#define BX_SUPPORT_PNI 0 #define BX_SUPPORT_SEP 0 #define BX_SUPPORT_4MEG_PAGES 0 @@ -764,6 +765,10 @@ typedef #error "SSE cannot be compiled without FPU+MMX support" #endif +#if (BX_SUPPORT_PNI && !BX_SUPPORT_SSE) +#error "PNI cannot be compiled without SSE support" +#endif + #if (BX_CPU_LEVEL<6 && BX_SUPPORT_SEP) #error SYSENTER/SYSEXIT only supported with CPU_LEVEL >= 6 #endif diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index 3a6de36df..948afac37 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: cpu.h,v 1.144 2003-08-28 19:25:23 sshwarts Exp $ +// $Id: cpu.h,v 1.145 2003-08-29 21:20:52 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -168,19 +168,16 @@ #define BX_READ_8BIT_REGx(index,extended) ((((index) < 4) || (extended)) ? \ (BX_CPU_THIS_PTR gen_reg[index].word.byte.rl) : \ (BX_CPU_THIS_PTR gen_reg[(index)-4].word.byte.rh)) -#define BX_READ_16BIT_REG(index) (BX_CPU_THIS_PTR gen_reg[index].word.rx) -#define BX_READ_32BIT_REG(index) (BX_CPU_THIS_PTR gen_reg[index].dword.erx) #define BX_READ_64BIT_REG(index) (BX_CPU_THIS_PTR gen_reg[index].rrx) - #else - #define BX_READ_8BIT_REG(index) (((index) < 4) ? \ (BX_CPU_THIS_PTR gen_reg[index].word.byte.rl) : \ (BX_CPU_THIS_PTR gen_reg[(index)-4].word.byte.rh)) #define BX_READ_8BIT_REGx(index,ext) BX_READ_8BIT_REG(index) +#endif + #define BX_READ_16BIT_REG(index) (BX_CPU_THIS_PTR gen_reg[index].word.rx) #define BX_READ_32BIT_REG(index) (BX_CPU_THIS_PTR gen_reg[index].dword.erx) -#endif #define BX_READ_16BIT_BASE_REG(var, index) {\ var = *BX_CPU_THIS_PTR _16bit_base_reg[index];\ @@ -191,6 +188,7 @@ } #if BX_SUPPORT_X86_64 + #define BX_WRITE_8BIT_REGx(index, extended, val) {\ if (((index) < 4) || (extended)) \ BX_CPU_THIS_PTR gen_reg[index].word.byte.rl = val; \ @@ -231,6 +229,7 @@ #define BX_WRITE_32BIT_REGZ(index, val) {\ BX_CPU_THIS_PTR gen_reg[index].dword.erx = (Bit32u) val; \ } + #endif #ifndef CPL @@ -2160,6 +2159,19 @@ union { BX_SMF void PSLLDQ_WdqIb(bxInstruction_c *i); /* SSE2 */ + /* PNI */ + BX_SMF void MOVDDUP_VpdWq(bxInstruction_c *i); + BX_SMF void MOVSLDUP_VpsWps(bxInstruction_c *i); + BX_SMF void MOVSHDUP_VpsWps(bxInstruction_c *i); + BX_SMF void HADDPD_VpdWpd(bxInstruction_c *i); + BX_SMF void HADDPS_VpsWps(bxInstruction_c *i); + BX_SMF void HSUBPD_VpdWpd(bxInstruction_c *i); + BX_SMF void HSUBPS_VpsWps(bxInstruction_c *i); + BX_SMF void ADDSUBPD_VpdWpd(bxInstruction_c *i); + BX_SMF void ADDSUBPS_VpsWps(bxInstruction_c *i); + BX_SMF void LDDQU_VdqMdq(bxInstruction_c *i); + /* PNI */ + #if BX_SUPPORT_FPU BX_SMF void fpu_execute(bxInstruction_c *i); BX_SMF void fpu_init(void); diff --git a/bochs/cpu/ctrl_xfer32.cc b/bochs/cpu/ctrl_xfer32.cc index 737558e69..6e43d2625 100644 --- a/bochs/cpu/ctrl_xfer32.cc +++ b/bochs/cpu/ctrl_xfer32.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: ctrl_xfer32.cc,v 1.27 2003-08-17 18:15:04 akrisak Exp $ +// $Id: ctrl_xfer32.cc,v 1.28 2003-08-29 21:20:52 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -592,7 +592,6 @@ done: BX_CPU_C::IRET32(bxInstruction_c *i) { BailBigRSP("IRET32"); -// Bit32u eip, ecs_raw, eflags; invalidate_prefetch_q(); @@ -619,19 +618,9 @@ BailBigRSP("IRET32"); goto done; #endif -// BX_ERROR(("IRET32 called when you're not in vm8086 mode or protected mode.")); BX_ERROR(("IRET32 may not be implemented right.")); BX_PANIC(("Please report that you have found a test case for BX_CPU_C::IRET32.")); -// pop_32(&eip); -// pop_32(&ecs_raw); -// pop_32(&eflags); - -// load_seg_reg(&BX_CPU_THIS_PTR sregs[BX_SEG_REG_CS], (Bit16u) ecs_raw); -// EIP = eip; - //FIXME: this should do (eflags & 0x257FD5) | (EFLAGS | 0x1A0000) -// write_eflags(eflags, /* change IOPL? */ 1, /* change IF? */ 1, 0, 1); - done: BX_INSTR_FAR_BRANCH(BX_CPU_ID, BX_INSTR_IS_IRET, BX_CPU_THIS_PTR sregs[BX_SEG_REG_CS].selector.value, EIP); diff --git a/bochs/cpu/fetchdecode.cc b/bochs/cpu/fetchdecode.cc index a3f717480..b4e01f33a 100644 --- a/bochs/cpu/fetchdecode.cc +++ b/bochs/cpu/fetchdecode.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: fetchdecode.cc,v 1.57 2003-08-28 19:25:23 sshwarts Exp $ +// $Id: fetchdecode.cc,v 1.58 2003-08-29 21:20:52 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -863,8 +863,8 @@ static BxOpcodeInfo_t BxOpcodeInfo[512*2] = { /* 0F 79 */ { 0, &BX_CPU_C::BxError }, /* 0F 7A */ { 0, &BX_CPU_C::BxError }, /* 0F 7B */ { 0, &BX_CPU_C::BxError }, - /* 0F 7C */ { 0, &BX_CPU_C::BxError }, - /* 0F 7D */ { 0, &BX_CPU_C::BxError }, + /* 0F 7C */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7c }, + /* 0F 7D */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7d }, /* 0F 7E */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7e }, /* 0F 7F */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7f }, /* 0F 80 */ { BxImmediate_BrOff16, &BX_CPU_C::JCC_Jw }, @@ -947,7 +947,7 @@ static BxOpcodeInfo_t BxOpcodeInfo[512*2] = { /* 0F CD */ { 0, &BX_CPU_C::BSWAP_EBP }, /* 0F CE */ { 0, &BX_CPU_C::BSWAP_ESI }, /* 0F CF */ { 0, &BX_CPU_C::BSWAP_EDI }, - /* 0F D0 */ { 0, &BX_CPU_C::BxError }, + /* 0F D0 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd0 }, /* 0F D1 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd1 }, /* 0F D2 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd2 }, /* 0F D3 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd3 }, @@ -979,7 +979,7 @@ static BxOpcodeInfo_t BxOpcodeInfo[512*2] = { /* 0F ED */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fed }, /* 0F EE */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fee }, /* 0F EF */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fef }, - /* 0F F0 */ { 0, &BX_CPU_C::BxError }, + /* 0F F0 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff0 }, /* 0F F1 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff1 }, /* 0F F2 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff2 }, /* 0F F3 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff3 }, @@ -1397,8 +1397,8 @@ static BxOpcodeInfo_t BxOpcodeInfo[512*2] = { /* 0F 79 */ { 0, &BX_CPU_C::BxError }, /* 0F 7A */ { 0, &BX_CPU_C::BxError }, /* 0F 7B */ { 0, &BX_CPU_C::BxError }, - /* 0F 7C */ { 0, &BX_CPU_C::BxError }, - /* 0F 7D */ { 0, &BX_CPU_C::BxError }, + /* 0F 7C */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7c }, + /* 0F 7D */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7d }, /* 0F 7E */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7e }, /* 0F 7F */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7f }, /* 0F 80 */ { BxImmediate_BrOff32, &BX_CPU_C::JCC_Jd }, @@ -1481,7 +1481,7 @@ static BxOpcodeInfo_t BxOpcodeInfo[512*2] = { /* 0F CD */ { 0, &BX_CPU_C::BSWAP_EBP }, /* 0F CE */ { 0, &BX_CPU_C::BSWAP_ESI }, /* 0F CF */ { 0, &BX_CPU_C::BSWAP_EDI }, - /* 0F D0 */ { 0, &BX_CPU_C::BxError }, + /* 0F D0 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd0 }, /* 0F D1 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd1 }, /* 0F D2 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd2 }, /* 0F D3 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd3 }, @@ -1513,7 +1513,7 @@ static BxOpcodeInfo_t BxOpcodeInfo[512*2] = { /* 0F ED */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fed }, /* 0F EE */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fee }, /* 0F EF */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fef }, - /* 0F F0 */ { 0, &BX_CPU_C::BxError }, + /* 0F F0 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff0 }, /* 0F F1 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff1 }, /* 0F F2 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff2 }, /* 0F F3 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff3 }, diff --git a/bochs/cpu/fetchdecode.h b/bochs/cpu/fetchdecode.h index 8f6e95d92..0b096def6 100755 --- a/bochs/cpu/fetchdecode.h +++ b/bochs/cpu/fetchdecode.h @@ -379,10 +379,10 @@ static BxOpcodeInfo_t BxOpcodeGroupSSE_0f11[4] = { }; static BxOpcodeInfo_t BxOpcodeGroupSSE_0f12[4] = { - /* -- */ { 0, &BX_CPU_C::MOVLPS_VpsMq }, - /* 66 */ { 0, &BX_CPU_C::MOVLPD_VsdMq }, - /* F2 */ { 0, &BX_CPU_C::BxError }, - /* F3 */ { 0, &BX_CPU_C::BxError } + /* -- */ { 0, &BX_CPU_C::MOVLPS_VpsMq }, + /* 66 */ { 0, &BX_CPU_C::MOVLPD_VsdMq }, + /* F2 */ { 0, &BX_CPU_C::MOVDDUP_VpdWq }, // PNI + /* F3 */ { 0, &BX_CPU_C::MOVSLDUP_VpsWps } // PNI }; static BxOpcodeInfo_t BxOpcodeGroupSSE_0f13[4] = { @@ -410,7 +410,7 @@ static BxOpcodeInfo_t BxOpcodeGroupSSE_0f16[4] = { /* -- */ { 0, &BX_CPU_C::MOVHPS_VpsMq }, /* 66 */ { 0, &BX_CPU_C::MOVHPD_VsdMq }, /* F2 */ { 0, &BX_CPU_C::BxError }, - /* F3 */ { 0, &BX_CPU_C::BxError } + /* F3 */ { 0, &BX_CPU_C::MOVSHDUP_VpsWps } // PNI }; static BxOpcodeInfo_t BxOpcodeGroupSSE_0f17[4] = { @@ -697,7 +697,7 @@ static BxOpcodeInfo_t BxOpcodeGroupSSE_0f6f[4] = { /* -- */ { 0, &BX_CPU_C::MOVQ_PqQq }, /* 66 */ { 0, &BX_CPU_C::MOVDQA_VdqWdq }, /* F2 */ { 0, &BX_CPU_C::BxError }, - /* F3 */ { 0, &BX_CPU_C::MOVDQU_VdqWdq }, + /* F3 */ { 0, &BX_CPU_C::MOVDQU_VdqWdq } }; static BxOpcodeInfo_t BxOpcodeGroupSSE_0f70[4] = { @@ -728,18 +728,32 @@ static BxOpcodeInfo_t BxOpcodeGroupSSE_0f76[4] = { /* F3 */ { 0, &BX_CPU_C::BxError } }; +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f7c[4] = { + /* -- */ { 0, &BX_CPU_C::BxError }, + /* 66 */ { 0, &BX_CPU_C::HADDPD_VpdWpd }, // PNI + /* F2 */ { 0, &BX_CPU_C::HADDPS_VpsWps }, // PNI + /* F3 */ { 0, &BX_CPU_C::BxError } + }; + +static BxOpcodeInfo_t BxOpcodeGroupSSE_0f7d[4] = { + /* -- */ { 0, &BX_CPU_C::BxError }, + /* 66 */ { 0, &BX_CPU_C::HSUBPD_VpdWpd }, // PNI + /* F2 */ { 0, &BX_CPU_C::HSUBPS_VpsWps }, // PNI + /* F3 */ { 0, &BX_CPU_C::BxError } + }; + static BxOpcodeInfo_t BxOpcodeGroupSSE_0f7e[4] = { /* -- */ { 0, &BX_CPU_C::MOVD_EdPd }, /* 66 */ { 0, &BX_CPU_C::MOVD_EdVd }, /* F2 */ { 0, &BX_CPU_C::BxError }, - /* F3 */ { 0, &BX_CPU_C::MOVQ_VqWq }, + /* F3 */ { 0, &BX_CPU_C::MOVQ_VqWq } }; static BxOpcodeInfo_t BxOpcodeGroupSSE_0f7f[4] = { /* -- */ { 0, &BX_CPU_C::MOVQ_QqPq }, /* 66 */ { 0, &BX_CPU_C::MOVDQA_WdqVdq }, /* F2 */ { 0, &BX_CPU_C::BxError }, - /* F3 */ { 0, &BX_CPU_C::MOVDQU_WdqVdq }, + /* F3 */ { 0, &BX_CPU_C::MOVDQU_WdqVdq } }; static BxOpcodeInfo_t BxOpcodeGroupSSE_0fc2[4] = { @@ -777,6 +791,13 @@ static BxOpcodeInfo_t BxOpcodeGroupSSE_0fc6[4] = { /* F3 */ { 0, &BX_CPU_C::BxError } }; +static BxOpcodeInfo_t BxOpcodeGroupSSE_0fd0[4] = { + /* -- */ { 0, &BX_CPU_C::BxError }, + /* 66 */ { 0, &BX_CPU_C::ADDSUBPD_VpdWpd }, // PNI + /* F2 */ { 0, &BX_CPU_C::ADDSUBPS_VpsWps }, // PNI + /* F3 */ { 0, &BX_CPU_C::BxError } + }; + static BxOpcodeInfo_t BxOpcodeGroupSSE_0fd1[4] = { /* -- */ { 0, &BX_CPU_C::PSRLW_PqQq }, /* 66 */ { 0, &BX_CPU_C::PSRLW_VdqWdq }, @@ -816,7 +837,7 @@ static BxOpcodeInfo_t BxOpcodeGroupSSE_0fd6[4] = { /* -- */ { 0, &BX_CPU_C::BxError }, /* 66 */ { 0, &BX_CPU_C::MOVQ_WqVq }, /* F2 */ { 0, &BX_CPU_C::MOVDQ2Q_PqVRq }, - /* F3 */ { 0, &BX_CPU_C::MOVQ2DQ_VdqQq }, + /* F3 */ { 0, &BX_CPU_C::MOVQ2DQ_VdqQq } }; static BxOpcodeInfo_t BxOpcodeGroupSSE_0fd7[4] = { @@ -994,6 +1015,13 @@ static BxOpcodeInfo_t BxOpcodeGroupSSE_0fef[4] = { /* F3 */ { 0, &BX_CPU_C::BxError } }; +static BxOpcodeInfo_t BxOpcodeGroupSSE_0ff0[4] = { + /* -- */ { 0, &BX_CPU_C::BxError }, + /* 66 */ { 0, &BX_CPU_C::BxError }, + /* F2 */ { 0, &BX_CPU_C::LDDQU_VdqMdq }, // PNI + /* F3 */ { 0, &BX_CPU_C::BxError } + }; + static BxOpcodeInfo_t BxOpcodeGroupSSE_0ff1[4] = { /* -- */ { 0, &BX_CPU_C::PSLLW_PqQq }, /* 66 */ { 0, &BX_CPU_C::PSLLW_VdqWdq }, diff --git a/bochs/cpu/fetchdecode64.cc b/bochs/cpu/fetchdecode64.cc index 46e8b54f8..3d283afa3 100644 --- a/bochs/cpu/fetchdecode64.cc +++ b/bochs/cpu/fetchdecode64.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: fetchdecode64.cc,v 1.55 2003-08-28 19:25:23 sshwarts Exp $ +// $Id: fetchdecode64.cc,v 1.56 2003-08-29 21:20:52 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -954,10 +954,10 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F 79 */ { 0, &BX_CPU_C::BxError }, /* 0F 7A */ { 0, &BX_CPU_C::BxError }, /* 0F 7B */ { 0, &BX_CPU_C::BxError }, - /* 0F 7C */ { 0, &BX_CPU_C::BxError }, - /* 0F 7D */ { 0, &BX_CPU_C::BxError }, - /* 0F 7E */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7e }, - /* 0F 7F */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7f }, + /* 0F 7C */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7c }, + /* 0F 7D */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7d }, + /* 0F 7E */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7e }, + /* 0F 7F */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7f }, /* 0F 80 */ { BxImmediate_BrOff16, &BX_CPU_C::JCC_Jq }, /* 0F 81 */ { BxImmediate_BrOff16, &BX_CPU_C::JCC_Jq }, /* 0F 82 */ { BxImmediate_BrOff16, &BX_CPU_C::JCC_Jq }, @@ -1038,7 +1038,7 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F CD */ { 0, &BX_CPU_C::BSWAP_EBP }, /* 0F CE */ { 0, &BX_CPU_C::BSWAP_ESI }, /* 0F CF */ { 0, &BX_CPU_C::BSWAP_EDI }, - /* 0F D0 */ { 0, &BX_CPU_C::BxError }, + /* 0F D0 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd0 }, /* 0F D1 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd1 }, /* 0F D2 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd2 }, /* 0F D3 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd3 }, @@ -1070,7 +1070,7 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F ED */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fed }, /* 0F EE */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fee }, /* 0F EF */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fef }, - /* 0F F0 */ { 0, &BX_CPU_C::BxError }, + /* 0F F0 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff0 }, /* 0F F1 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff1 }, /* 0F F2 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff2 }, /* 0F F3 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff3 }, @@ -1478,10 +1478,10 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F 79 */ { 0, &BX_CPU_C::BxError }, /* 0F 7A */ { 0, &BX_CPU_C::BxError }, /* 0F 7B */ { 0, &BX_CPU_C::BxError }, - /* 0F 7C */ { 0, &BX_CPU_C::BxError }, - /* 0F 7D */ { 0, &BX_CPU_C::BxError }, - /* 0F 7E */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7e }, - /* 0F 7F */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7f }, + /* 0F 7C */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7c }, + /* 0F 7D */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7d }, + /* 0F 7E */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7e }, + /* 0F 7F */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7f }, /* 0F 80 */ { BxImmediate_BrOff32, &BX_CPU_C::JCC_Jq }, /* 0F 81 */ { BxImmediate_BrOff32, &BX_CPU_C::JCC_Jq }, /* 0F 82 */ { BxImmediate_BrOff32, &BX_CPU_C::JCC_Jq }, @@ -1562,7 +1562,7 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F CD */ { 0, &BX_CPU_C::BSWAP_EBP }, /* 0F CE */ { 0, &BX_CPU_C::BSWAP_ESI }, /* 0F CF */ { 0, &BX_CPU_C::BSWAP_EDI }, - /* 0F D0 */ { 0, &BX_CPU_C::BxError }, + /* 0F D0 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd0 }, /* 0F D1 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd1 }, /* 0F D2 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd2 }, /* 0F D3 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd3 }, @@ -1594,7 +1594,7 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F ED */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fed }, /* 0F EE */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fee }, /* 0F EF */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fef }, - /* 0F F0 */ { 0, &BX_CPU_C::BxError }, + /* 0F F0 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff0 }, /* 0F F1 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff1 }, /* 0F F2 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff2 }, /* 0F F3 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff3 }, @@ -1999,10 +1999,10 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F 79 */ { 0, &BX_CPU_C::BxError }, /* 0F 7A */ { 0, &BX_CPU_C::BxError }, /* 0F 7B */ { 0, &BX_CPU_C::BxError }, - /* 0F 7C */ { 0, &BX_CPU_C::BxError }, - /* 0F 7D */ { 0, &BX_CPU_C::BxError }, - /* 0F 7E */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7e }, - /* 0F 7F */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7f }, + /* 0F 7C */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7c }, + /* 0F 7D */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7d }, + /* 0F 7E */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7e }, + /* 0F 7F */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0f7f }, /* 0F 80 */ { BxImmediate_BrOff32, &BX_CPU_C::JCC_Jq }, /* 0F 81 */ { BxImmediate_BrOff32, &BX_CPU_C::JCC_Jq }, /* 0F 82 */ { BxImmediate_BrOff32, &BX_CPU_C::JCC_Jq }, @@ -2083,7 +2083,7 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F CD */ { 0, &BX_CPU_C::BSWAP_RBP }, /* 0F CE */ { 0, &BX_CPU_C::BSWAP_RSI }, /* 0F CF */ { 0, &BX_CPU_C::BSWAP_RDI }, - /* 0F D0 */ { 0, &BX_CPU_C::BxError }, + /* 0F D0 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd0 }, /* 0F D1 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd1 }, /* 0F D2 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd2 }, /* 0F D3 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fd3 }, @@ -2115,7 +2115,7 @@ static BxOpcodeInfo_t BxOpcodeInfo64[512*3] = { /* 0F ED */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fed }, /* 0F EE */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fee }, /* 0F EF */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0fef }, - /* 0F F0 */ { 0, &BX_CPU_C::BxError }, + /* 0F F0 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff0 }, /* 0F F1 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff1 }, /* 0F F2 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff2 }, /* 0F F3 */ { BxAnother | BxPrefixSSE, NULL, BxOpcodeGroupSSE_0ff3 }, diff --git a/bochs/cpu/proc_ctrl.cc b/bochs/cpu/proc_ctrl.cc index 3a3f265cf..faf6ae226 100644 --- a/bochs/cpu/proc_ctrl.cc +++ b/bochs/cpu/proc_ctrl.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: proc_ctrl.cc,v 1.73 2003-08-03 16:44:53 sshwarts Exp $ +// $Id: proc_ctrl.cc,v 1.74 2003-08-29 21:20:52 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -73,7 +73,6 @@ BX_CPU_C::HLT(bxInstruction_c *i) BX_PANIC(("HALT instruction encountered in the BIOS ROM")); if (CPL!=0) { -// BX_INFO(("HLT(): CPL!=0")); exception(BX_GP_EXCEPTION, 0, 0); return; } @@ -889,11 +888,9 @@ BX_CPU_C::MOV_CqRq(bxInstruction_c *i) // Protected mode: #GP(0) if attempt to write a 1 to // any reserved bit of CR4 - BX_INFO(("MOV_CqRq: ignoring write to CR4 of 0x%08x", - val_64)); + BX_INFO(("MOV_CqRq: ignoring write to CR4 of 0x%08x", val_64)); if (val_64) { - BX_INFO(("MOV_CqRq: (CR4) write of 0x%08x not supported!", - val_64)); + BX_INFO(("MOV_CqRq: (CR4) write of 0x%08x not supported!", val_64)); } // Only allow writes of 0 to CR4 for now. // Writes to bits in CR4 should not be 1s as CPUID @@ -1343,7 +1340,7 @@ BX_PANIC(("LOADALL: handle CR0.val32")); static inline Bit32u get_std_cpuid_features() { - unsigned features; + Bit32u features; // EAX[3:0] Stepping ID // EAX[7:4] Model: starts at 1 @@ -1351,7 +1348,10 @@ get_std_cpuid_features() // EAX[13:12] Type: 0=OEM,1=overdrive,2=dual cpu,3=reserved // EAX[31:14] Reserved // EBX: Reserved (0) - // ECX: Reserved (0) + // ECX: Feature Flags::Extended + // [0:0] PNI + // [3:3] MONITOR/MWAIT + // [7:7] Enchanced Intel Speedstep Technology // EDX: Feature Flags // [0:0] FPU on chip // [1:1] VME: Virtual-8086 Mode enhancements @@ -1409,21 +1409,19 @@ get_std_cpuid_features() #if BX_SUPPORT_4MEG_PAGES features |= (1<<3); // Support Page-Size Extension (4M pages) #endif - #if BX_SupportGlobalPages features |= (1<<13); // Support Global pages. #endif - #if BX_SupportPAE features |= (1<<6); // Support PAE. #endif #if BX_SUPPORT_X86_64 - features |= (1<<5); // AMD specific MSR's + features |= (1<<5); // AMD specific MSR's #endif #if BX_SUPPORT_SEP - features |= (1<<11); // SYSENTER/SYSEXIT + features |= (1<<11); // SYSENTER/SYSEXIT #endif return features; @@ -1476,15 +1474,15 @@ BX_CPU_C::CPUID(bxInstruction_c *i) #elif BX_CPU_LEVEL == 5 family = 5; - model = 1; // Pentium (60,66) + model = 1; // Pentium (60,66) stepping = 3; // ??? #elif BX_CPU_LEVEL == 6 family = 6; #if BX_SUPPORT_X86_64 - model = 2; // Hammer returns what? + model = 2; // Hammer returns what? #else - model = 1; // Pentium Pro + model = 1; // Pentium Pro #endif stepping = 3; // ??? #else @@ -1493,6 +1491,9 @@ BX_CPU_C::CPUID(bxInstruction_c *i) RAX = (family <<8) | (model<<4) | stepping; RBX = RCX = 0; // reserved +#if BX_SUPPORT_PNI + RCX = 1; // report PNI +#endif RDX = get_std_cpuid_features (); break; @@ -1844,8 +1845,8 @@ BX_CPU_C::RDMSR(bxInstruction_c *i) default: #if BX_IGNORE_BAD_MSR - BX_ERROR(("RDMSR: Unknown register %#x", ECX)); - return; + BX_ERROR(("RDMSR: Unknown register %#x", ECX)); + return; #else BX_PANIC(("RDMSR: Unknown register %#x", ECX)); #endif @@ -1972,7 +1973,6 @@ BX_CPU_C::WRMSR(bxInstruction_c *i) do_exception: exception(BX_GP_EXCEPTION, 0, 0); - } void @@ -2099,24 +2099,21 @@ BX_CPU_C::SYSEXIT (bxInstruction_c *i) } #if BX_SUPPORT_X86_64 - void -BX_CPU_C::SWAPGS(bxInstruction_c *i) +void BX_CPU_C::SWAPGS(bxInstruction_c *i) { Bit64u temp_GS_base; - if(CPL != 0) { + if(CPL != 0) exception(BX_GP_EXCEPTION, 0, 0); - } + temp_GS_base = MSR_GSBASE; MSR_GSBASE = MSR_KERNELGSBASE; MSR_KERNELGSBASE = temp_GS_base; - } #endif #if BX_X86_DEBUGGER - Bit32u -BX_CPU_C::hwdebug_compare(Bit32u laddr_0, unsigned size, +Bit32u BX_CPU_C::hwdebug_compare(Bit32u laddr_0, unsigned size, unsigned opa, unsigned opb) { // Support x86 hardware debug facilities (DR0..DR7) diff --git a/bochs/cpu/sse_move.cc b/bochs/cpu/sse_move.cc index 4fbf31d63..b33e37398 100644 --- a/bochs/cpu/sse_move.cc +++ b/bochs/cpu/sse_move.cc @@ -299,7 +299,6 @@ void BX_CPU_C::FXRSTOR(bxInstruction_c *i) /* MOVUPS: 0F 10 */ /* MOVUPD: 66 0F 10 */ /* MOVDQU: F3 0F 6F */ - void BX_CPU_C::MOVUPS_VpsWps(bxInstruction_c *i) { #if BX_SUPPORT_SSE >= 1 @@ -327,7 +326,6 @@ void BX_CPU_C::MOVUPS_VpsWps(bxInstruction_c *i) /* MOVUPS: 0F 11 */ /* MOVUPD: 66 0F 11 */ /* MOVDQU: F3 0F 7F */ - void BX_CPU_C::MOVUPS_WpsVps(bxInstruction_c *i) { #if BX_SUPPORT_SSE >= 1 @@ -351,7 +349,6 @@ void BX_CPU_C::MOVUPS_WpsVps(bxInstruction_c *i) /* MOVAPS: 0F 28 */ /* MOVAPD: 66 0F 28 */ /* MOVDQA: F3 0F 6F */ - void BX_CPU_C::MOVAPS_VpsWps(bxInstruction_c *i) { #if BX_SUPPORT_SSE >= 1 @@ -379,7 +376,6 @@ void BX_CPU_C::MOVAPS_VpsWps(bxInstruction_c *i) /* MOVAPS: 0F 29 */ /* MOVAPD: 66 0F 29 */ /* MOVDQA: F3 0F 7F */ - void BX_CPU_C::MOVAPS_WpsVps(bxInstruction_c *i) { #if BX_SUPPORT_SSE >= 1 @@ -525,7 +521,6 @@ void BX_CPU_C::MOVSD_WsdVsd(bxInstruction_c *i) /* MOVLPS: 0F 12 */ /* MOVLPD: 66 0F 12 */ - void BX_CPU_C::MOVLPS_VpsMq(bxInstruction_c *i) { #if BX_SUPPORT_SSE >= 1 @@ -549,9 +544,94 @@ void BX_CPU_C::MOVLPS_VpsMq(bxInstruction_c *i) #endif } +/* F2 0F 12 */ +void BX_CPU_C::MOVDDUP_VpdWq(bxInstruction_c *i) +{ +#if BX_SUPPORT_PNI + BX_CPU_THIS_PTR prepareSSE(); + Bit64u val64; + BxPackedXmmRegister op; + + if (i->modC0()) + { + val64 = BX_READ_XMM_REG_LO_QWORD(i->rm()); + } + else { + /* pointer, segment address pair */ + read_virtual_qword(i->seg(), RMAddr(i), &val64); + } + + op.xmm64u(0) = val64; + op.xmm64u(1) = val64; + + /* now write result back to destination */ + BX_WRITE_XMM_REG(i->nnn(), op); +#else + BX_INFO(("MOVDDUP_VpdWq: required PNI, use --enable-pni option")); + UndefinedOpcode(i); +#endif +} + +/* F3 0F 12 */ +void BX_CPU_C::MOVSLDUP_VpsWps(bxInstruction_c *i) +{ +#if BX_SUPPORT_PNI + BX_CPU_THIS_PTR prepareSSE(); + BxPackedXmmRegister op, result; + + /* op is a register or memory reference */ + if (i->modC0()) { + op = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op); + } + + result.xmm32u(0) = op.xmm32u(0); + result.xmm32u(1) = op.xmm32u(0); + result.xmm32u(2) = op.xmm32u(2); + result.xmm32u(3) = op.xmm32u(2); + + /* now write result back to destination */ + BX_WRITE_XMM_REG(i->nnn(), result); +#else + BX_INFO(("MOVSLDUP_VpsWps: required PNI, use --enable-pni option")); + UndefinedOpcode(i); +#endif +} + +/* F3 0F 16 */ +void BX_CPU_C::MOVSHDUP_VpsWps(bxInstruction_c *i) +{ +#if BX_SUPPORT_PNI + BX_CPU_THIS_PTR prepareSSE(); + BxPackedXmmRegister op, result; + + /* op is a register or memory reference */ + if (i->modC0()) { + op = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op); + } + + result.xmm32u(0) = op.xmm32u(1); + result.xmm32u(1) = op.xmm32u(1); + result.xmm32u(2) = op.xmm32u(3); + result.xmm32u(3) = op.xmm32u(3); + + /* now write result back to destination */ + BX_WRITE_XMM_REG(i->nnn(), result); +#else + BX_INFO(("MOVHLDUP_VpsWps: required PNI, use --enable-pni option")); + UndefinedOpcode(i); +#endif +} + /* MOVLPS: 0F 13 */ /* MOVLPD: 66 0F 13 */ - void BX_CPU_C::MOVLPS_MqVps(bxInstruction_c *i) { #if BX_SUPPORT_SSE >= 1 @@ -573,7 +653,6 @@ void BX_CPU_C::MOVLPS_MqVps(bxInstruction_c *i) /* MOVHPS: 0F 16 */ /* MOVHPD: 66 0F 16 */ - void BX_CPU_C::MOVHPS_VpsMq(bxInstruction_c *i) { #if BX_SUPPORT_SSE >= 1 @@ -599,7 +678,6 @@ void BX_CPU_C::MOVHPS_VpsMq(bxInstruction_c *i) /* MOVHPS: 0F 17 */ /* MOVHPD: 66 0F 17 */ - void BX_CPU_C::MOVHPS_MqVps(bxInstruction_c *i) { #if BX_SUPPORT_SSE >= 1 @@ -619,6 +697,31 @@ void BX_CPU_C::MOVHPS_MqVps(bxInstruction_c *i) #endif } +/* F2 0F F0 */ +void BX_CPU_C::LDDQU_VdqMdq(bxInstruction_c *i) +{ +#if BX_SUPPORT_PNI + BX_CPU_THIS_PTR prepareSSE(); + + /* source must be memory reference */ + if (i->modC0()) { + BX_INFO(("LDDQU_VdqMdq: must be memory reference")); + UndefinedOpcode(i); + } + + BxPackedXmmRegister op; + + readVirtualDQword(i->seg(), RMAddr(i), (Bit8u *) &op); + /* now write result back to destination */ + BX_WRITE_XMM_REG(i->nnn(), op); + +#else + BX_INFO(("LDDQU_VdqMdq: required PNI, use --enable-pni option")); + UndefinedOpcode(i); +#endif +} + +/* 66 0F F7 */ void BX_CPU_C::MASKMOVDQU_VdqVRdq(bxInstruction_c *i) { #if BX_SUPPORT_SSE >= 2 @@ -1349,7 +1452,6 @@ void BX_CPU_C::MOVNTI_MdGd(bxInstruction_c *i) /* MOVNTPS: 0F 2B */ /* MOVNTPD: 66 0F 2B */ /* MOVNTDQ: 66 0F E7 */ - void BX_CPU_C::MOVNTPS_MdqVps(bxInstruction_c *i) { #if BX_SUPPORT_SSE >= 1 diff --git a/bochs/cpu/sse_pfp.cc b/bochs/cpu/sse_pfp.cc index 496ef4e37..aedbc9894 100644 --- a/bochs/cpu/sse_pfp.cc +++ b/bochs/cpu/sse_pfp.cc @@ -2564,6 +2564,212 @@ void BX_CPU_C::MAXSS_VssWss(bxInstruction_c *i) #endif } +/* + * Opcode: 66 0F 7C + * Add horizontally packed double precision FP in XMM2/MEM from XMM1. + * Possible floating point exceptions: #I, #D, #O, #U, #P + */ +void BX_CPU_C::HADDPD_VpdWpd(bxInstruction_c *i) +{ +#if BX_SUPPORT_PNI + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + softfloat_status_word_t status_word; + MXCSR_to_softfloat_status_word(status_word, MXCSR); + int rc; + + if (MXCSR.get_DAZ()) + { + op1.xmm64u(0) = handleDAZ(op1.xmm64u(0)); + op1.xmm64u(1) = handleDAZ(op1.xmm64u(1)); + + op2.xmm64u(0) = handleDAZ(op2.xmm64u(0)); + op2.xmm64u(1) = handleDAZ(op2.xmm64u(1)); + } + + result.xmm64u(0) = + float64_add(op1.xmm64u(0), op1.xmm64u(1), status_word); + result.xmm64u(1) = + float64_add(op2.xmm64u(1), op2.xmm64u(1), status_word); + + BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags); + BX_WRITE_XMM_REG(i->nnn(), result); + +#else + BX_INFO(("HADDPD_VpdWpd: required PNI, use --enable-pni option")); + UndefinedOpcode(i); +#endif +} + +/* + * Opcode: F2 0F 7C + * Add horizontally packed single precision FP in XMM2/MEM from XMM1. + * Possible floating point exceptions: #I, #D, #O, #U, #P + */ +void BX_CPU_C::HADDPS_VpsWps(bxInstruction_c *i) +{ +#if BX_SUPPORT_PNI + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + softfloat_status_word_t status_word; + MXCSR_to_softfloat_status_word(status_word, MXCSR); + int rc; + + if (MXCSR.get_DAZ()) { + op1.xmm32u(0) = handleDAZ(op1.xmm32u(0)); + op1.xmm32u(1) = handleDAZ(op1.xmm32u(1)); + op1.xmm32u(2) = handleDAZ(op1.xmm32u(2)); + op1.xmm32u(3) = handleDAZ(op1.xmm32u(3)); + + op2.xmm32u(0) = handleDAZ(op2.xmm32u(0)); + op2.xmm32u(1) = handleDAZ(op2.xmm32u(1)); + op2.xmm32u(2) = handleDAZ(op2.xmm32u(2)); + op2.xmm32u(3) = handleDAZ(op2.xmm32u(3)); + } + + result.xmm32u(0) = + float32_add(op1.xmm32u(0), op1.xmm32u(1), status_word); + result.xmm32u(1) = + float32_add(op1.xmm32u(2), op1.xmm32u(3), status_word); + result.xmm32u(2) = + float32_add(op2.xmm32u(0), op2.xmm32u(1), status_word); + result.xmm32u(3) = + float32_add(op2.xmm32u(2), op2.xmm32u(3), status_word); + + BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags); + BX_WRITE_XMM_REG(i->nnn(), result); + +#else + BX_INFO(("HADDPS_VpsWps: required PNI, use --enable-pni option")); + UndefinedOpcode(i); +#endif +} + +/* + * Opcode: 66 0F 7D + * Subtract horizontally packed double precision FP in XMM2/MEM from XMM1. + * Possible floating point exceptions: #I, #D, #O, #U, #P + */ +void BX_CPU_C::HSUBPD_VpdWpd(bxInstruction_c *i) +{ +#if BX_SUPPORT_PNI + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + softfloat_status_word_t status_word; + MXCSR_to_softfloat_status_word(status_word, MXCSR); + int rc; + + if (MXCSR.get_DAZ()) + { + op1.xmm64u(0) = handleDAZ(op1.xmm64u(0)); + op1.xmm64u(1) = handleDAZ(op1.xmm64u(1)); + + op2.xmm64u(0) = handleDAZ(op2.xmm64u(0)); + op2.xmm64u(1) = handleDAZ(op2.xmm64u(1)); + } + + result.xmm64u(0) = + float64_sub(op1.xmm64u(0), op1.xmm64u(1), status_word); + result.xmm64u(1) = + float64_sub(op2.xmm64u(1), op2.xmm64u(1), status_word); + + BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags); + BX_WRITE_XMM_REG(i->nnn(), result); + +#else + BX_INFO(("HSUBPD_VpdWpd: required PNI, use --enable-pni option")); + UndefinedOpcode(i); +#endif +} + +/* + * Opcode: F2 0F 7D + * Subtract horizontally packed single precision FP in XMM2/MEM from XMM1. + * Possible floating point exceptions: #I, #D, #O, #U, #P + */ +void BX_CPU_C::HSUBPS_VpsWps(bxInstruction_c *i) +{ +#if BX_SUPPORT_PNI + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + softfloat_status_word_t status_word; + MXCSR_to_softfloat_status_word(status_word, MXCSR); + int rc; + + if (MXCSR.get_DAZ()) { + op1.xmm32u(0) = handleDAZ(op1.xmm32u(0)); + op1.xmm32u(1) = handleDAZ(op1.xmm32u(1)); + op1.xmm32u(2) = handleDAZ(op1.xmm32u(2)); + op1.xmm32u(3) = handleDAZ(op1.xmm32u(3)); + + op2.xmm32u(0) = handleDAZ(op2.xmm32u(0)); + op2.xmm32u(1) = handleDAZ(op2.xmm32u(1)); + op2.xmm32u(2) = handleDAZ(op2.xmm32u(2)); + op2.xmm32u(3) = handleDAZ(op2.xmm32u(3)); + } + + result.xmm32u(0) = + float32_sub(op1.xmm32u(0), op1.xmm32u(1), status_word); + result.xmm32u(1) = + float32_sub(op1.xmm32u(2), op1.xmm32u(3), status_word); + result.xmm32u(2) = + float32_sub(op2.xmm32u(0), op2.xmm32u(1), status_word); + result.xmm32u(3) = + float32_sub(op2.xmm32u(2), op2.xmm32u(3), status_word); + + BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags); + BX_WRITE_XMM_REG(i->nnn(), result); + +#else + BX_INFO(("HSUBPS_VpsWps: required PNI, use --enable-pni option")); + UndefinedOpcode(i); +#endif +} + /* * Opcode: 0F C2 * Compare packed single precision FP values using Ib as comparison predicate. @@ -2812,3 +3018,104 @@ void BX_CPU_C::CMPSS_VssWssIb(bxInstruction_c *i) UndefinedOpcode(i); #endif } + +/* + * Opcode: 66 0F D0 + * Add/Subtract packed double precision FP numbers from XMM2/MEM to XMM1. + * Possible floating point exceptions: #I, #D, #O, #U, #P + */ +void BX_CPU_C::ADDSUBPD_VpdWpd(bxInstruction_c *i) +{ +#if BX_SUPPORT_PNI + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + softfloat_status_word_t status_word; + MXCSR_to_softfloat_status_word(status_word, MXCSR); + + if (MXCSR.get_DAZ()) + { + op1.xmm64u(0) = handleDAZ(op1.xmm64u(0)); + op1.xmm64u(1) = handleDAZ(op1.xmm64u(1)); + + op2.xmm64u(0) = handleDAZ(op2.xmm64u(0)); + op2.xmm64u(1) = handleDAZ(op2.xmm64u(1)); + } + + result.xmm64u(0) = + float64_sub(op1.xmm64u(0), op2.xmm64u(0), status_word); + result.xmm64u(1) = + float64_add(op1.xmm64u(1), op2.xmm64u(1), status_word); + + BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags); + BX_WRITE_XMM_REG(i->nnn(), result); + +#else + BX_INFO(("ADDSUBPD_VpdWpd: required PNI, use --enable-pni option")); + UndefinedOpcode(i); +#endif +} + +/* + * Opcode: F2 0F D0 + * Add/Substract packed single precision FP numbers from XMM2/MEM to XMM1. + * Possible floating point exceptions: #I, #D, #O, #U, #P + */ +void BX_CPU_C::ADDSUBPS_VpsWps(bxInstruction_c *i) +{ +#if BX_SUPPORT_PNI + BX_CPU_THIS_PTR prepareSSE(); + + BxPackedXmmRegister op1 = BX_READ_XMM_REG(i->nnn()), op2, result; + + /* op2 is a register or memory reference */ + if (i->modC0()) { + op2 = BX_READ_XMM_REG(i->rm()); + } + else { + /* pointer, segment address pair */ + readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2); + } + + softfloat_status_word_t status_word; + MXCSR_to_softfloat_status_word(status_word, MXCSR); + + if (MXCSR.get_DAZ()) { + op1.xmm32u(0) = handleDAZ(op1.xmm32u(0)); + op1.xmm32u(1) = handleDAZ(op1.xmm32u(1)); + op1.xmm32u(2) = handleDAZ(op1.xmm32u(2)); + op1.xmm32u(3) = handleDAZ(op1.xmm32u(3)); + + op2.xmm32u(0) = handleDAZ(op2.xmm32u(0)); + op2.xmm32u(1) = handleDAZ(op2.xmm32u(1)); + op2.xmm32u(2) = handleDAZ(op2.xmm32u(2)); + op2.xmm32u(3) = handleDAZ(op2.xmm32u(3)); + } + + result.xmm32u(0) = + float32_sub(op1.xmm32u(0), op2.xmm32u(0), status_word); + result.xmm32u(1) = + float32_add(op1.xmm32u(1), op2.xmm32u(1), status_word); + result.xmm32u(2) = + float32_sub(op1.xmm32u(2), op2.xmm32u(2), status_word); + result.xmm32u(3) = + float32_add(op1.xmm32u(3), op2.xmm32u(3), status_word); + + BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags); + BX_WRITE_XMM_REG(i->nnn(), result); + +#else + BX_INFO(("ADDSUBPS_VpsWps: required PNI, use --enable-pni option")); + UndefinedOpcode(i); +#endif +}