implement DPPS/DPPD ops using existing primitives; added some missing defs

This commit is contained in:
Stanislav Shwartsman 2014-02-02 18:57:25 +00:00
parent ca1b496efc
commit 55e1d53a48
5 changed files with 55 additions and 58 deletions

View File

@ -35,6 +35,7 @@ extern float32 approximate_rcp(float32 op);
#include "fpu/softfloat-compare.h"
#include "simd_pfp.h"
#include "simd_int.h"
void BX_CPU_C::print_state_AVX(void)
{
@ -1095,32 +1096,28 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VDPPS_VpsHpsWpsIbR(bxInstruction_c
for (unsigned n=0; n < len; n++) {
// op1: [A, B, C, D]
// op2: [E, F, G, H]
// after multiplication: op1 = [AE, BF, CG, DH]
xmm_mulps_mask(&op1.ymm128(n), &op2.ymm128(n), status, mask >> 4);
float32 tmp1 = float32_add(op1.ymm32u(n*4+0), op1.ymm32u(n*4+1), status);
float32 tmp2 = float32_add(op1.ymm32u(n*4+2), op1.ymm32u(n*4+3), status);
// shuffle op2 = [BF, AE, DH, CG]
xmm_shufps(&op2.ymm128(n), &op1.ymm128(n), &op1.ymm128(n), 0xb1);
#ifdef BX_DPPS_DPPD_NAN_MATCHING_HARDWARE
float32 r1 = float32_add(tmp1, tmp2, status);
float32 r2 = float32_add(tmp2, tmp1, status);
// op2 = [(BF+AE), (AE+BF), (DH+CG), (CG+DH)]
xmm_addps(&op2.ymm128(n), &op1.ymm128(n), status);
op1.ymm32u(n*4+0) = (mask & 0x01) ? r1 : 0;
op1.ymm32u(n*4+1) = (mask & 0x02) ? r1 : 0;
op1.ymm32u(n*4+2) = (mask & 0x04) ? r2 : 0;
op1.ymm32u(n*4+3) = (mask & 0x08) ? r2 : 0;
#else
float32 r = float32_add(tmp1, tmp2, status);
// shuffle op1 = [(DH+CG), (CG+DH), (BF+AE), (AE+BF)]
xmm_shufpd(&op1.ymm128(n), &op2.ymm128(n), &op2.ymm128(n), 0x1);
op1.ymm32u(n*4+0) = (mask & 0x01) ? r : 0;
op1.ymm32u(n*4+1) = (mask & 0x02) ? r : 0;
op1.ymm32u(n*4+2) = (mask & 0x04) ? r : 0;
op1.ymm32u(n*4+3) = (mask & 0x08) ? r : 0;
#endif
// op2 = [(BF+AE)+(DH+CG), (AE+BF)+(CG+DH), (DH+CG)+(BF+AE), (CG+DH)+(AE+BF)]
xmm_addps_mask(&op2.ymm128(n), &op1.ymm128(n), status, mask);
}
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op1, len);
BX_WRITE_YMM_REGZ_VLEN(i->dst(), op2, len);
BX_NEXT_INSTR(i);
}

View File

@ -435,6 +435,9 @@ struct BxExceptionInfo {
#define BX_MSR_TSC_DEADLINE 0x6E0
/* Intel MPX supervisor bound configuration register */
#define BX_MSR_BNDCFGS 0xd90
#define BX_MSR_MAX_INDEX 0x1000
enum {

View File

@ -2,7 +2,7 @@
// $Id$
/////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2013 Stanislav Shwartsman
// Copyright (c) 2013-2014 Stanislav Shwartsman
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
//
// This library is free software; you can redistribute it and/or
@ -99,7 +99,8 @@ enum {
BX_KMASK_REG = 0xA,
BX_SEGREG = 0xB,
BX_CREG = 0xC,
BX_DREG = 0xD
BX_DREG = 0xD,
BX_BOUNDS_REG = 0xE
};
enum {

View File

@ -1360,7 +1360,7 @@ bx_define_opcode(BX_IA_TEST_EqId, &BX_CPU_C::TEST_EqIdM, &BX_CPU_C::TEST_EqIdR,
bx_define_opcode(BX_IA_CMP_EqId, &BX_CPU_C::CMP_EqIdM, &BX_CPU_C::CMP_EqIdR, 0, OP_Eq, OP_sId, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_XCHG_EqGq, &BX_CPU_C::XCHG_EqGqM, &BX_CPU_C::XCHG_EqGqR, 0, OP_Eq, OP_Gq, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_LEA_GqM, &BX_CPU_C::LEA_GqM, &BX_CPU_C::BxError, 0, OP_Gq, OP_Mq, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_LEA_GqM, &BX_CPU_C::LEA_GqM, &BX_CPU_C::BxError, 0, OP_Gq, OP_M, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_MOV_Op64_GdEd, &BX_CPU_C::MOV64_GdEdM, &BX_CPU_C::MOV_GdEdR, 0, OP_Gd, OP_Ed, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_MOV_Op64_EdGd, &BX_CPU_C::MOV64_EdGdM, &BX_CPU_C::MOV_GdEdR, 0, OP_Ed, OP_Gd, OP_NONE, OP_NONE, 0)
bx_define_opcode(BX_IA_MOV_GqEq, &BX_CPU_C::MOV_GqEqM, &BX_CPU_C::MOV_GqEqR, 0, OP_Gq, OP_Eq, OP_NONE, OP_NONE, 0)

View File

@ -31,6 +31,7 @@
#include "fpu/softfloat-compare.h"
#include "simd_pfp.h"
#include "simd_int.h"
void BX_CPU_C::check_exceptionsSSE(int exceptions_flags)
{
@ -1833,32 +1834,28 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPS_VpsWpsIbR(bxInstruction_c *i)
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
// op1: [A, B, C, D]
// op2: [E, F, G, H]
// after multiplication: op1 = [EA, BF, CG, DH]
xmm_mulps_mask(&op1, &op2, status, mask >> 4);
float32 tmp1 = float32_add(op1.xmm32u(0), op1.xmm32u(1), status);
float32 tmp2 = float32_add(op1.xmm32u(2), op1.xmm32u(3), status);
op1.clear();
#ifdef BX_DPPS_DPPD_NAN_MATCHING_HARDWARE
float32 r1 = float32_add(tmp1, tmp2, status);
float32 r2 = float32_add(tmp2, tmp1, status);
if (mask & 0x01) op1.xmm32u(0) = r1;
if (mask & 0x02) op1.xmm32u(1) = r1;
if (mask & 0x04) op1.xmm32u(2) = r2;
if (mask & 0x08) op1.xmm32u(3) = r2;
#else
float32 r = float32_add(tmp1, tmp2, status);
if (mask & 0x01) op1.xmm32u(0) = r;
if (mask & 0x02) op1.xmm32u(1) = r;
if (mask & 0x04) op1.xmm32u(2) = r;
if (mask & 0x08) op1.xmm32u(3) = r;
#endif
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG(i->dst(), op1);
// shuffle op2 = [BF, AE, DH, CG]
xmm_shufps(&op2, &op1, &op1, 0xb1);
// op2 = [(BF+AE), (AE+BF), (DH+CG), (CG+DH)]
xmm_addps(&op2, &op1, status);
check_exceptionsSSE(get_exception_flags(status));
// shuffle op1 = [(DH+CG), (CG+DH), (BF+AE), (AE+BF)]
xmm_shufpd(&op1, &op2, &op2, 0x1);
// op2 = [(BF+AE)+(DH+CG), (AE+BF)+(CG+DH), (DH+CG)+(BF+AE), (CG+DH)+(AE+BF)]
xmm_addps_mask(&op2, &op1, status, mask);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REG(i->dst(), op2);
BX_NEXT_INSTR(i);
}
@ -1877,22 +1874,21 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::DPPD_VpdHpdWpdIbR(bxInstruction_c
float_status_t status;
mxcsr_to_softfloat_status_word(status, MXCSR);
// op1: [A, B]
// op2: [C, D]
// after multiplication: op1 = [AC, BD]
xmm_mulpd_mask(&op1, &op2, status, mask >> 4);
op2.clear();
#ifdef BX_DPPS_DPPD_NAN_MATCHING_HARDWARE
if (mask & 0x01) op2.xmm64u(0) = float64_add(op1.xmm64u(0), op1.xmm64u(1), status);
if (mask & 0x02) op2.xmm64u(1) = float64_add(op1.xmm64u(1), op1.xmm64u(0), status);
#else
float64 result = float64_add(op1.xmm64u(0), op1.xmm64u(1), status);
if (mask & 0x01) op2.xmm64u(0) = result;
if (mask & 0x02) op2.xmm64u(1) = result;
#endif
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REGZ(i->dst(), op2, i->getVL());
// shuffle op2 = [BD, AC]
xmm_shufpd(&op2, &op1, &op1, 0x1);
// op1 = [AC+BD, BD+AC]
xmm_addpd_mask(&op1, &op2, status, mask);
check_exceptionsSSE(get_exception_flags(status));
BX_WRITE_XMM_REGZ(i->dst(), op1, i->getVL());
BX_NEXT_INSTR(i);
}