code cleanups
This commit is contained in:
parent
bace4d0c6b
commit
72c710947c
@ -549,4 +549,8 @@ BX_INSF_TYPE BX_CPP_AttrRegparmN(1) BX_CPU_C::VMOVSS_MASK_VssHpsWssR(bxInstructi
|
|||||||
BX_NEXT_INSTR(i);
|
BX_NEXT_INSTR(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////
|
||||||
|
// masked store with down conversion //
|
||||||
|
///////////////////////////////////////
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
// $Id$
|
// $Id$
|
||||||
/////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////
|
||||||
//
|
//
|
||||||
// Copyright (c) 2011-2013 Stanislav Shwartsman
|
// Copyright (c) 2011-2014 Stanislav Shwartsman
|
||||||
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
// Written by Stanislav Shwartsman [sshwarts at sourceforge net]
|
||||||
//
|
//
|
||||||
// This library is free software; you can redistribute it and/or
|
// This library is free software; you can redistribute it and/or
|
||||||
@ -490,68 +490,132 @@ BX_CPP_INLINE Bit32u xmm_pmovmskq(const BxPackedXmmRegister *op)
|
|||||||
|
|
||||||
// blend
|
// blend
|
||||||
|
|
||||||
BX_CPP_INLINE void xmm_pblendb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, unsigned mask)
|
BX_CPP_INLINE void xmm_pblendb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, Bit32u mask)
|
||||||
{
|
{
|
||||||
for (unsigned n=0; n < 16; n++, mask >>= 1) {
|
for (unsigned n=0; n < 16; n++, mask >>= 1) {
|
||||||
if (mask & 0x1) op1->xmmubyte(n) = op2->xmmubyte(n);
|
if (mask & 0x1) op1->xmmubyte(n) = op2->xmmubyte(n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if BX_SUPPORT_EVEX
|
BX_CPP_INLINE void xmm_zero_pblendb(BxPackedXmmRegister *dst, const BxPackedXmmRegister *op, Bit32u mask)
|
||||||
BX_CPP_INLINE void xmm_zero_pblendb(BxPackedXmmRegister *dst, const BxPackedXmmRegister *op, unsigned mask)
|
|
||||||
{
|
{
|
||||||
for (unsigned n=0; n < 16; n++, mask >>= 1) {
|
for (unsigned n=0; n < 16; n++, mask >>= 1) {
|
||||||
dst->xmmubyte(n) = (mask & 0x1) ? op->xmmubyte(n) : 0;
|
dst->xmmubyte(n) = (mask & 0x1) ? op->xmmubyte(n) : 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if BX_SUPPORT_EVEX
|
||||||
|
BX_CPP_INLINE void simd_pblendb(BxPackedAvxRegister *op1, const BxPackedAvxRegister *op2, Bit64u mask, unsigned len)
|
||||||
|
{
|
||||||
|
for (unsigned n=0; n < len; n++) {
|
||||||
|
if (mask & 0x1) op1->vmmubyte(n) = op2->vmmubyte(n);
|
||||||
|
mask >>= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_CPP_INLINE void simd_zero_pblendb(BxPackedAvxRegister *dst, const BxPackedAvxRegister *op, Bit64u mask, unsigned len)
|
||||||
|
{
|
||||||
|
for (unsigned n=0; n < len; n++) {
|
||||||
|
dst->vmmubyte(n) = (mask & 0x1) ? op->vmmubyte(n) : 0;
|
||||||
|
mask >>= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
BX_CPP_INLINE void xmm_pblendw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, unsigned mask)
|
BX_CPP_INLINE void xmm_pblendw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, Bit32u mask)
|
||||||
{
|
{
|
||||||
for (unsigned n=0; n < 8; n++, mask >>= 1) {
|
for (unsigned n=0; n < 8; n++, mask >>= 1) {
|
||||||
if (mask & 0x1) op1->xmm16u(n) = op2->xmm16u(n);
|
if (mask & 0x1) op1->xmm16u(n) = op2->xmm16u(n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if BX_SUPPORT_EVEX
|
BX_CPP_INLINE void xmm_zero_pblendw(BxPackedXmmRegister *dst, const BxPackedXmmRegister *op, Bit32u mask)
|
||||||
BX_CPP_INLINE void xmm_zero_pblendw(BxPackedXmmRegister *dst, const BxPackedXmmRegister *op, unsigned mask)
|
|
||||||
{
|
{
|
||||||
for (unsigned n=0; n < 8; n++, mask >>= 1) {
|
for (unsigned n=0; n < 8; n++, mask >>= 1) {
|
||||||
dst->xmm16u(n) = (mask & 0x1) ? op->xmm16u(n) : 0;
|
dst->xmm16u(n) = (mask & 0x1) ? op->xmm16u(n) : 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if BX_SUPPORT_EVEX
|
||||||
|
BX_CPP_INLINE void simd_pblendw(BxPackedAvxRegister *op1, const BxPackedAvxRegister *op2, Bit32u mask, unsigned len)
|
||||||
|
{
|
||||||
|
for (unsigned n=0; n < len; n++) {
|
||||||
|
if (mask & 0x1) op1->vmm16u(n) = op2->vmm16u(n);
|
||||||
|
mask >>= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_CPP_INLINE void simd_zero_pblendw(BxPackedAvxRegister *dst, const BxPackedAvxRegister *op, Bit32u mask, unsigned len)
|
||||||
|
{
|
||||||
|
for (unsigned n=0; n < len; n++) {
|
||||||
|
dst->vmm16u(n) = (mask & 0x1) ? op->vmm16u(n) : 0;
|
||||||
|
mask >>= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
BX_CPP_INLINE void xmm_blendps(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, unsigned mask)
|
BX_CPP_INLINE void xmm_blendps(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, Bit32u mask)
|
||||||
{
|
{
|
||||||
for (unsigned n=0; n < 4; n++, mask >>= 1) {
|
for (unsigned n=0; n < 4; n++, mask >>= 1) {
|
||||||
if (mask & 0x1) op1->xmm32u(n) = op2->xmm32u(n);
|
if (mask & 0x1) op1->xmm32u(n) = op2->xmm32u(n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if BX_SUPPORT_EVEX
|
BX_CPP_INLINE void xmm_zero_blendps(BxPackedXmmRegister *dst, const BxPackedXmmRegister *op, Bit32u mask)
|
||||||
BX_CPP_INLINE void xmm_zero_blendps(BxPackedXmmRegister *dst, const BxPackedXmmRegister *op, unsigned mask)
|
|
||||||
{
|
{
|
||||||
for (unsigned n=0; n < 4; n++, mask >>= 1) {
|
for (unsigned n=0; n < 4; n++, mask >>= 1) {
|
||||||
dst->xmm32u(n) = (mask & 0x1) ? op->xmm32u(n) : 0;
|
dst->xmm32u(n) = (mask & 0x1) ? op->xmm32u(n) : 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if BX_SUPPORT_EVEX
|
||||||
|
BX_CPP_INLINE void simd_blendps(BxPackedAvxRegister *op1, const BxPackedAvxRegister *op2, Bit32u mask, unsigned len)
|
||||||
|
{
|
||||||
|
for (unsigned n=0; n < len; n++) {
|
||||||
|
if (mask & 0x1) op1->vmm32u(n) = op2->vmm32u(n);
|
||||||
|
mask >>= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_CPP_INLINE void simd_zero_blendps(BxPackedAvxRegister *dst, const BxPackedAvxRegister *op, Bit32u mask, unsigned len)
|
||||||
|
{
|
||||||
|
for (unsigned n=0; n < len; n++) {
|
||||||
|
dst->vmm32u(n) = (mask & 0x1) ? op->vmm32u(n) : 0;
|
||||||
|
mask >>= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
BX_CPP_INLINE void xmm_blendpd(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, unsigned mask)
|
BX_CPP_INLINE void xmm_blendpd(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, Bit32u mask)
|
||||||
{
|
{
|
||||||
for (unsigned n=0; n < 2; n++, mask >>= 1) {
|
for (unsigned n=0; n < 2; n++, mask >>= 1) {
|
||||||
if (mask & 0x1) op1->xmm64u(n) = op2->xmm64u(n);
|
if (mask & 0x1) op1->xmm64u(n) = op2->xmm64u(n);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if BX_SUPPORT_EVEX
|
BX_CPP_INLINE void xmm_zero_blendpd(BxPackedXmmRegister *dst, const BxPackedXmmRegister *op, Bit32u mask)
|
||||||
BX_CPP_INLINE void xmm_zero_blendpd(BxPackedXmmRegister *dst, const BxPackedXmmRegister *op, unsigned mask)
|
|
||||||
{
|
{
|
||||||
for (unsigned n=0; n < 2; n++, mask >>= 1) {
|
for (unsigned n=0; n < 2; n++, mask >>= 1) {
|
||||||
dst->xmm64u(n) = (mask & 0x1) ? op->xmm64u(n) : 0;
|
dst->xmm64u(n) = (mask & 0x1) ? op->xmm64u(n) : 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if BX_SUPPORT_EVEX
|
||||||
|
BX_CPP_INLINE void simd_blendpd(BxPackedAvxRegister *op1, const BxPackedAvxRegister *op2, Bit32u mask, unsigned len)
|
||||||
|
{
|
||||||
|
for (unsigned n=0; n < len; n++) {
|
||||||
|
if (mask & 0x1) op1->vmm64u(n) = op2->vmm64u(n);
|
||||||
|
mask >>= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
BX_CPP_INLINE void simd_zero_blendpd(BxPackedAvxRegister *dst, const BxPackedAvxRegister *op, Bit32u mask, unsigned len)
|
||||||
|
{
|
||||||
|
for (unsigned n=0; n < len; n++) {
|
||||||
|
dst->vmm64u(n) = (mask & 0x1) ? op->vmm64u(n) : 0;
|
||||||
|
mask >>= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
BX_CPP_INLINE void xmm_pblendvb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *mask)
|
BX_CPP_INLINE void xmm_pblendvb(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *mask)
|
||||||
@ -561,6 +625,13 @@ BX_CPP_INLINE void xmm_pblendvb(BxPackedXmmRegister *op1, const BxPackedXmmRegis
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BX_CPP_INLINE void xmm_pblendvw(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *mask)
|
||||||
|
{
|
||||||
|
for(unsigned n=0; n<8; n++) {
|
||||||
|
if (mask->xmm16s(n) < 0) op1->xmm16u(n) = op2->xmm16u(n);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
BX_CPP_INLINE void xmm_blendvps(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *mask)
|
BX_CPP_INLINE void xmm_blendvps(BxPackedXmmRegister *op1, const BxPackedXmmRegister *op2, const BxPackedXmmRegister *mask)
|
||||||
{
|
{
|
||||||
for(unsigned n=0; n<4; n++) {
|
for(unsigned n=0; n<4; n++) {
|
||||||
@ -921,28 +992,28 @@ BX_CPP_INLINE void xmm_pbroadcastq(BxPackedXmmRegister *op, Bit64u val_64)
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if BX_SUPPORT_EVEX
|
#if BX_SUPPORT_EVEX
|
||||||
BX_CPP_INLINE void simd_pbroadcastb(BxPackedZmmRegister *op, Bit8u val_8, unsigned len)
|
BX_CPP_INLINE void simd_pbroadcastb(BxPackedAvxRegister *op, Bit8u val_8, unsigned len)
|
||||||
{
|
{
|
||||||
for(unsigned n=0; n < len; n++) {
|
for(unsigned n=0; n < len; n++) {
|
||||||
op->vmmubyte(n) = val_8;
|
op->vmmubyte(n) = val_8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BX_CPP_INLINE void simd_pbroadcastw(BxPackedZmmRegister *op, Bit16u val_16, unsigned len)
|
BX_CPP_INLINE void simd_pbroadcastw(BxPackedAvxRegister *op, Bit16u val_16, unsigned len)
|
||||||
{
|
{
|
||||||
for(unsigned n=0; n < len; n++) {
|
for(unsigned n=0; n < len; n++) {
|
||||||
op->vmm16u(n) = val_16;
|
op->vmm16u(n) = val_16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BX_CPP_INLINE void simd_pbroadcastd(BxPackedZmmRegister *op, Bit32u val_32, unsigned len)
|
BX_CPP_INLINE void simd_pbroadcastd(BxPackedAvxRegister *op, Bit32u val_32, unsigned len)
|
||||||
{
|
{
|
||||||
for(unsigned n=0; n < len; n++) {
|
for(unsigned n=0; n < len; n++) {
|
||||||
op->vmm32u(n) = val_32;
|
op->vmm32u(n) = val_32;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
BX_CPP_INLINE void simd_pbroadcastq(BxPackedZmmRegister *op, Bit64u val_64, unsigned len)
|
BX_CPP_INLINE void simd_pbroadcastq(BxPackedAvxRegister *op, Bit64u val_64, unsigned len)
|
||||||
{
|
{
|
||||||
for(unsigned n=0; n < len; n++) {
|
for(unsigned n=0; n < len; n++) {
|
||||||
op->vmm64u(n) = val_64;
|
op->vmm64u(n) = val_64;
|
||||||
|
Loading…
Reference in New Issue
Block a user