implement DPPS/DPPD ops using existing primitives; added some missing defs
This commit is contained in:
parent
ca1b496efc
commit
55e1d53a48
@ -35,6 +35,7 @@ extern float32 approximate_rcp(float32 op);
|
||||
|
||||
#include "fpu/softfloat-compare.h"
|
||||
#include "simd_pfp.h"
|
||||
#include "simd_int.h"
|
||||
|
||||
void BX_CPU_C::print_state_AVX(void)
|
||||
{
|
||||
@ -1095,32 +1096,28 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VDPPS_VpsHpsWpsIbR(bxInstruction_c
|
||||
|
||||
for (unsigned n=0; n < len; n++) {
|
||||
|
||||
// op1: [A, B, C, D]
|
||||
// op2: [E, F, G, H]
|
||||
|
||||
// after multiplication: op1 = [AE, BF, CG, DH]
|
||||
xmm_mulps_mask(&op1.ymm128(n), &op2.ymm128(n), status, mask >> 4);
|
||||
|
||||
float32 tmp1 = float32_add(op1.ymm32u(n*4+0), op1.ymm32u(n*4+1), status);
|
||||
float32 tmp2 = float32_add(op1.ymm32u(n*4+2), op1.ymm32u(n*4+3), status);
|
||||
// shuffle op2 = [BF, AE, DH, CG]
|
||||
xmm_shufps(&op2.ymm128(n), &op1.ymm128(n), &op1.ymm128(n), 0xb1);
|
||||
|
||||
#ifdef BX_DPPS_DPPD_NAN_MATCHING_HARDWARE
|
||||
float32 r1 = float32_add(tmp1, tmp2, status);
|
||||
float32 r2 = float32_add(tmp2, tmp1, status);
|
||||
// op2 = [(BF+AE), (AE+BF), (DH+CG), (CG+DH)]
|
||||
xmm_addps(&op2.ymm128(n), &op1.ymm128(n), status);
|
||||
|
||||
op1.ymm32u(n*4+0) = (mask & 0x01) ? r1 : 0;
|
||||
op1.ymm32u(n*4+1) = (mask & 0x02) ? r1 : 0;
|
||||
op1.ymm32u(n*4+2) = (mask & 0x04) ? r2 : 0;
|
||||
op1.ymm32u(n*4+3) = (mask & 0x08) ? r2 : 0;
|
||||
#else
|
||||
float32 r = float32_add(tmp1, tmp2, status);
|
||||
// shuffle op1 = [(DH+CG), (CG+DH), (BF+AE), (AE+BF)]
|
||||
xmm_shufpd(&op1.ymm128(n), &op2.ymm128(n), &op2.ymm128(n), 0x1);
|
||||
|
||||
op1.ymm32u(n*4+0) = (mask & 0x01) ? r : 0;
|
||||
op1.ymm32u(n*4+1) = (mask & 0x02) ? r : 0;
|
||||
op1.ymm32u(n*4+2) = (mask & 0x04) ? r : 0;
|
||||
op1.ymm32u(n*4+3) = (mask & 0x08) ? r : 0;
|
||||
#endif
|
||||
// op2 = [(BF+AE)+(DH+CG), (AE+BF)+(CG+DH), (DH+CG)+(BF+AE), (CG+DH)+(AE+BF)]
|
||||
xmm_addps_mask(&op2.ymm128(n), &op1.ymm128(n), status, mask);
|
||||
}
|
||||
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
|
||||
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op2, len);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
@ -435,6 +435,9 @@ struct BxExceptionInfo {
|
||||
|
||||
#define BX_MSR_TSC_DEADLINE 0x6E0
|
||||
|
||||
/* Intel MPX supervisor bound configuration register */
|
||||
#define BX_MSR_BNDCFGS 0xd90
|
||||
|
||||
#define BX_MSR_MAX_INDEX 0x1000
|
||||
|
||||
enum {
|
||||
|
@ -2,7 +2,7 @@
|
||||
// $Id$
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Copyright (c) 2013 Stanislav Shwartsman
|
||||
// Copyright (c) 2013-2014 Stanislav Shwartsman
|
||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or
|
||||
@ -99,7 +99,8 @@ enum {
|
||||
BX_KMASK_REG = 0xA,
|
||||
BX_SEGREG = 0xB,
|
||||
BX_CREG = 0xC,
|
||||
BX_DREG = 0xD
|
||||
BX_DREG = 0xD,
|
||||
BX_BOUNDS_REG = 0xE
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -1360,7 +1360,7 @@ bx_define_opcode(BX_IA_TEST_EqId, &BX_CPU_C::TEST_EqIdM, &BX_CPU_C::TEST_EqIdR,
|
||||
bx_define_opcode(BX_IA_CMP_EqId, &BX_CPU_C::CMP_EqIdM, &BX_CPU_C::CMP_EqIdR, 0, OP_Eq, OP_sId, OP_NONE, OP_NONE, 0)
|
||||
|
||||
bx_define_opcode(BX_IA_XCHG_EqGq, &BX_CPU_C::XCHG_EqGqM, &BX_CPU_C::XCHG_EqGqR, 0, OP_Eq, OP_Gq, OP_NONE, OP_NONE, 0)
|
||||
bx_define_opcode(BX_IA_LEA_GqM, &BX_CPU_C::LEA_GqM, &BX_CPU_C::BxError, 0, OP_Gq, OP_Mq, OP_NONE, OP_NONE, 0)
|
||||
bx_define_opcode(BX_IA_LEA_GqM, &BX_CPU_C::LEA_GqM, &BX_CPU_C::BxError, 0, OP_Gq, OP_M, OP_NONE, OP_NONE, 0)
|
||||
bx_define_opcode(BX_IA_MOV_Op64_GdEd, &BX_CPU_C::MOV64_GdEdM, &BX_CPU_C::MOV_GdEdR, 0, OP_Gd, OP_Ed, OP_NONE, OP_NONE, 0)
|
||||
bx_define_opcode(BX_IA_MOV_Op64_EdGd, &BX_CPU_C::MOV64_EdGdM, &BX_CPU_C::MOV_GdEdR, 0, OP_Ed, OP_Gd, OP_NONE, OP_NONE, 0)
|
||||
bx_define_opcode(BX_IA_MOV_GqEq, &BX_CPU_C::MOV_GqEqM, &BX_CPU_C::MOV_GqEqR, 0, OP_Gq, OP_Eq, OP_NONE, OP_NONE, 0)
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include "fpu/softfloat-compare.h"
|
||||
|
||||
#include "simd_pfp.h"
|
||||
#include "simd_int.h"
|
||||
|
||||
void BX_CPU_C::check_exceptionsSSE(int exceptions_flags)
|
||||
{
|
||||
@ -1833,32 +1834,28 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPS_VpsWpsIbR(bxInstruction_c *i)
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
|
||||
// op1: [A, B, C, D]
|
||||
// op2: [E, F, G, H]
|
||||
|
||||
// after multiplication: op1 = [EA, BF, CG, DH]
|
||||
xmm_mulps_mask(&op1, &op2, status, mask >> 4);
|
||||
|
||||
float32 tmp1 = float32_add(op1.xmm32u(0), op1.xmm32u(1), status);
|
||||
float32 tmp2 = float32_add(op1.xmm32u(2), op1.xmm32u(3), status);
|
||||
|
||||
op1.clear();
|
||||
|
||||
#ifdef BX_DPPS_DPPD_NAN_MATCHING_HARDWARE
|
||||
float32 r1 = float32_add(tmp1, tmp2, status);
|
||||
float32 r2 = float32_add(tmp2, tmp1, status);
|
||||
|
||||
if (mask & 0x01) op1.xmm32u(0) = r1;
|
||||
if (mask & 0x02) op1.xmm32u(1) = r1;
|
||||
if (mask & 0x04) op1.xmm32u(2) = r2;
|
||||
if (mask & 0x08) op1.xmm32u(3) = r2;
|
||||
#else
|
||||
float32 r = float32_add(tmp1, tmp2, status);
|
||||
|
||||
if (mask & 0x01) op1.xmm32u(0) = r;
|
||||
if (mask & 0x02) op1.xmm32u(1) = r;
|
||||
if (mask & 0x04) op1.xmm32u(2) = r;
|
||||
if (mask & 0x08) op1.xmm32u(3) = r;
|
||||
#endif
|
||||
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
BX_WRITE_XMM_REG(i->dst(), op1);
|
||||
|
||||
// shuffle op2 = [BF, AE, DH, CG]
|
||||
xmm_shufps(&op2, &op1, &op1, 0xb1);
|
||||
|
||||
// op2 = [(BF+AE), (AE+BF), (DH+CG), (CG+DH)]
|
||||
xmm_addps(&op2, &op1, status);
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
// shuffle op1 = [(DH+CG), (CG+DH), (BF+AE), (AE+BF)]
|
||||
xmm_shufpd(&op1, &op2, &op2, 0x1);
|
||||
|
||||
// op2 = [(BF+AE)+(DH+CG), (AE+BF)+(CG+DH), (DH+CG)+(BF+AE), (CG+DH)+(AE+BF)]
|
||||
xmm_addps_mask(&op2, &op1, status, mask);
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
BX_WRITE_XMM_REG(i->dst(), op2);
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
@ -1877,22 +1874,21 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPD_VpdHpdWpdIbR(bxInstruction_c
|
||||
float_status_t status;
|
||||
mxcsr_to_softfloat_status_word(status, MXCSR);
|
||||
|
||||
// op1: [A, B]
|
||||
// op2: [C, D]
|
||||
|
||||
// after multiplication: op1 = [AC, BD]
|
||||
xmm_mulpd_mask(&op1, &op2, status, mask >> 4);
|
||||
|
||||
op2.clear();
|
||||
|
||||
#ifdef BX_DPPS_DPPD_NAN_MATCHING_HARDWARE
|
||||
if (mask & 0x01) op2.xmm64u(0) = float64_add(op1.xmm64u(0), op1.xmm64u(1), status);
|
||||
if (mask & 0x02) op2.xmm64u(1) = float64_add(op1.xmm64u(1), op1.xmm64u(0), status);
|
||||
#else
|
||||
float64 result = float64_add(op1.xmm64u(0), op1.xmm64u(1), status);
|
||||
|
||||
if (mask & 0x01) op2.xmm64u(0) = result;
|
||||
if (mask & 0x02) op2.xmm64u(1) = result;
|
||||
#endif
|
||||
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
BX_WRITE_XMM_REGZ(i->dst(), op2, i->getVL());
|
||||
|
||||
// shuffle op2 = [BD, AC]
|
||||
xmm_shufpd(&op2, &op1, &op1, 0x1);
|
||||
|
||||
// op1 = [AC+BD, BD+AC]
|
||||
xmm_addpd_mask(&op1, &op2, status, mask);
|
||||
check_exceptionsSSE(get_exception_flags(status));
|
||||
|
||||
BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
|
||||
|
||||
BX_NEXT_INSTR(i);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user