masked load-store optimization for avx-512

This commit is contained in:
Stanislav Shwartsman 2015-01-26 20:52:03 +00:00
parent ee3841ef07
commit 17c89d1c78
2 changed files with 35 additions and 13 deletions

View File

@ -109,7 +109,7 @@ BX_CPU_C::write_virtual_checks(bx_segment_reg_t *seg, Bit32u offset, unsigned le
if (offset <= seg->cache.u.segment.limit_scaled || if (offset <= seg->cache.u.segment.limit_scaled ||
offset > upper_limit || (upper_limit - offset) < length) offset > upper_limit || (upper_limit - offset) < length)
{ {
BX_ERROR(("write_virtual_checks(): write beyond limit, r/w ED")); BX_ERROR(("write_virtual_checks(): write beyond limit, r/w expand down"));
return 0; return 0;
} }
break; break;
@ -148,8 +148,6 @@ BX_CPU_C::read_virtual_checks(bx_segment_reg_t *seg, Bit32u offset, unsigned len
switch (seg->cache.type) { switch (seg->cache.type) {
case 0: case 1: /* read only */ case 0: case 1: /* read only */
case 2: case 3: /* read/write */ case 2: case 3: /* read/write */
case 10: case 11: /* execute/read */
case 14: case 15: /* execute/read-only, conforming */
if (offset > (seg->cache.u.segment.limit_scaled - length) if (offset > (seg->cache.u.segment.limit_scaled - length)
|| length > seg->cache.u.segment.limit_scaled) || length > seg->cache.u.segment.limit_scaled)
{ {
@ -167,6 +165,22 @@ BX_CPU_C::read_virtual_checks(bx_segment_reg_t *seg, Bit32u offset, unsigned len
} }
break; break;
case 10: case 11: /* execute/read */
case 14: case 15: /* execute/read-only, conforming */
if (offset > (seg->cache.u.segment.limit_scaled - length)
|| length > seg->cache.u.segment.limit_scaled)
{
BX_ERROR(("read_virtual_checks(): read beyond limit"));
return 0;
}
if (seg->cache.u.segment.limit_scaled >= (BX_MAX_MEM_ACCESS_LENGTH-1)) {
// Mark cache as being OK type for succeeding reads. See notes for
// write checks; similar code.
seg->cache.valid |= SegAccessROK;
}
break;
case 4: case 5: /* read only, expand down */ case 4: case 5: /* read only, expand down */
case 6: case 7: /* read/write, expand down */ case 6: case 7: /* read/write, expand down */
if (seg->cache.u.segment.d_b) if (seg->cache.u.segment.d_b)
@ -176,7 +190,7 @@ BX_CPU_C::read_virtual_checks(bx_segment_reg_t *seg, Bit32u offset, unsigned len
if (offset <= seg->cache.u.segment.limit_scaled || if (offset <= seg->cache.u.segment.limit_scaled ||
offset > upper_limit || (upper_limit - offset) < length) offset > upper_limit || (upper_limit - offset) < length)
{ {
BX_ERROR(("read_virtual_checks(): read beyond limit ED")); BX_ERROR(("read_virtual_checks(): read beyond limit expand down"));
return 0; return 0;
} }
break; break;
@ -252,7 +266,7 @@ BX_CPU_C::execute_virtual_checks(bx_segment_reg_t *seg, Bit32u offset, unsigned
if (offset <= seg->cache.u.segment.limit_scaled || if (offset <= seg->cache.u.segment.limit_scaled ||
offset > upper_limit || (upper_limit - offset) < length) offset > upper_limit || (upper_limit - offset) < length)
{ {
BX_ERROR(("execute_virtual_checks(): read beyond limit ED")); BX_ERROR(("execute_virtual_checks(): read beyond limit expand down"));
return 0; return 0;
} }
break; break;

View File

@ -32,9 +32,10 @@ void BX_CPU_C::avx_masked_load8(bxInstruction_c *i, bx_address eaddr, BxPackedAv
unsigned len = i->getVL(); unsigned len = i->getVL();
if (i->as64L()) { if (i->as64L()) {
Bit64u laddr = get_laddr64(i->seg(), eaddr);
for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) { for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) {
if (mask & (BX_CONST64(1)<<n)) { if (mask & (BX_CONST64(1)<<n)) {
if (! IsCanonical(get_laddr64(i->seg(), eaddr + n))) if (! IsCanonical(laddr + n))
exception(int_number(i->seg()), 0); exception(int_number(i->seg()), 0);
} }
} }
@ -53,9 +54,10 @@ void BX_CPU_C::avx_masked_load16(bxInstruction_c *i, bx_address eaddr, BxPackedA
unsigned len = i->getVL(); unsigned len = i->getVL();
if (i->as64L()) { if (i->as64L()) {
Bit64u laddr = get_laddr64(i->seg(), eaddr);
for (unsigned n=0; n < WORD_ELEMENTS(len); n++) { for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
if (mask & (1<<n)) { if (mask & (1<<n)) {
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 2*n))) if (! IsCanonical(laddr + 2*n))
exception(int_number(i->seg()), 0); exception(int_number(i->seg()), 0);
} }
} }
@ -83,9 +85,10 @@ void BX_CPU_C::avx_masked_load32(bxInstruction_c *i, bx_address eaddr, BxPackedA
unsigned len = i->getVL(); unsigned len = i->getVL();
if (i->as64L()) { if (i->as64L()) {
Bit64u laddr = get_laddr64(i->seg(), eaddr);
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
if (mask & (1<<n)) { if (mask & (1<<n)) {
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 4*n))) if (! IsCanonical(laddr + 4*n))
exception(int_number(i->seg()), 0); exception(int_number(i->seg()), 0);
} }
} }
@ -113,9 +116,10 @@ void BX_CPU_C::avx_masked_load64(bxInstruction_c *i, bx_address eaddr, BxPackedA
unsigned len = i->getVL(); unsigned len = i->getVL();
if (i->as64L()) { if (i->as64L()) {
Bit64u laddr = get_laddr64(i->seg(), eaddr);
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
if (mask & (1<<n)) { if (mask & (1<<n)) {
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 8*n))) if (! IsCanonical(laddr + 8*n))
exception(int_number(i->seg()), 0); exception(int_number(i->seg()), 0);
} }
} }
@ -144,9 +148,10 @@ void BX_CPU_C::avx_masked_store8(bxInstruction_c *i, bx_address eaddr, const BxP
#if BX_SUPPORT_X86_64 #if BX_SUPPORT_X86_64
if (i->as64L()) { if (i->as64L()) {
Bit64u laddr = get_laddr64(i->seg(), eaddr);
for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) { for (unsigned n=0; n < BYTE_ELEMENTS(len); n++) {
if (mask & (BX_CONST64(1)<<n)) { if (mask & (BX_CONST64(1)<<n)) {
if (! IsCanonical(get_laddr64(i->seg(), eaddr + n))) if (! IsCanonical(laddr + n))
exception(int_number(i->seg()), 0); exception(int_number(i->seg()), 0);
} }
} }
@ -171,9 +176,10 @@ void BX_CPU_C::avx_masked_store16(bxInstruction_c *i, bx_address eaddr, const Bx
#if BX_SUPPORT_X86_64 #if BX_SUPPORT_X86_64
if (i->as64L()) { if (i->as64L()) {
Bit64u laddr = get_laddr64(i->seg(), eaddr);
for (unsigned n=0; n < WORD_ELEMENTS(len); n++) { for (unsigned n=0; n < WORD_ELEMENTS(len); n++) {
if (mask & (1<<n)) { if (mask & (1<<n)) {
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 2*n))) if (! IsCanonical(laddr + 2*n))
exception(int_number(i->seg()), 0); exception(int_number(i->seg()), 0);
} }
} }
@ -207,9 +213,10 @@ void BX_CPU_C::avx_masked_store32(bxInstruction_c *i, bx_address eaddr, const Bx
#if BX_SUPPORT_X86_64 #if BX_SUPPORT_X86_64
if (i->as64L()) { if (i->as64L()) {
Bit64u laddr = get_laddr64(i->seg(), eaddr);
for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) { for (unsigned n=0; n < DWORD_ELEMENTS(len); n++) {
if (mask & (1<<n)) { if (mask & (1<<n)) {
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 4*n))) if (! IsCanonical(laddr + 4*n))
exception(int_number(i->seg()), 0); exception(int_number(i->seg()), 0);
} }
} }
@ -243,9 +250,10 @@ void BX_CPU_C::avx_masked_store64(bxInstruction_c *i, bx_address eaddr, const Bx
#if BX_SUPPORT_X86_64 #if BX_SUPPORT_X86_64
if (i->as64L()) { if (i->as64L()) {
Bit64u laddr = get_laddr64(i->seg(), eaddr);
for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) { for (unsigned n=0; n < QWORD_ELEMENTS(len); n++) {
if (mask & (1<<n)) { if (mask & (1<<n)) {
if (! IsCanonical(get_laddr64(i->seg(), eaddr + 8*n))) if (! IsCanonical(laddr + 8*n))
exception(int_number(i->seg()), 0); exception(int_number(i->seg()), 0);
} }
} }