diff --git a/bochs/cpu/io.cc b/bochs/cpu/io.cc index ce92e3f7d..295c53605 100644 --- a/bochs/cpu/io.cc +++ b/bochs/cpu/io.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: io.cc,v 1.41 2007-10-10 22:20:32 sshwarts Exp $ +// $Id: io.cc,v 1.42 2007-10-29 15:39:18 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -47,8 +47,9 @@ #if BX_SupportRepeatSpeedups Bit32u BX_CPU_C::FastRepINSW(bxInstruction_c *i, bx_address dstOff, Bit16u port, Bit32u wordCount) { - Bit32u paddrDst, wordsFitDst; + Bit32u wordsFitDst; signed int pointerDelta; + Bit8u *hostAddrDst; bx_segment_reg_t *dstSegPtr = &BX_CPU_THIS_PTR sregs[BX_SEG_REG_ES]; @@ -60,32 +61,39 @@ Bit32u BX_CPU_C::FastRepINSW(bxInstruction_c *i, bx_address dstOff, Bit16u port, write_virtual_checks(dstSegPtr, dstOff, 2); bx_address laddrDst = BX_CPU_THIS_PTR get_segment_base(BX_SEG_REG_ES) + dstOff; + // check that the address is word aligned + if (laddrDst & 1) return 0; + +#if BX_SupportGuest2HostTLB + hostAddrDst = v2h_write_byte(laddrDst, CPL==3); +#else + bx_phy_address paddrDst; + if (BX_CPU_THIS_PTR cr0.get_PG()) paddrDst = dtranslate_linear(laddrDst, CPL==3, BX_WRITE); else paddrDst = laddrDst; + // If we want to write directly into the physical memory array, // we need the A20 address. - paddrDst = A20ADDR(paddrDst); + hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, + A20ADDR(paddrDst), BX_WRITE, DATA_ACCESS); +#endif - Bit8u *hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, - paddrDst, BX_WRITE, DATA_ACCESS); - - // Check that native host access was not vetoed for that page, and - // that the address is word aligned. - if (!hostAddrDst || (paddrDst & 1)) return 0; + // Check that native host access was not vetoed for that page + if (!hostAddrDst) return 0; // See how many words can fit in the rest of this page. if (BX_CPU_THIS_PTR get_DF()) { // Counting downward // Note: 1st word must not cross page boundary. - if ((paddrDst & 0xfff) > 0xffe) return 0; - wordsFitDst = (2 + (paddrDst & 0xfff)) >> 1; + if ((laddrDst & 0xfff) > 0xffe) return 0; + wordsFitDst = (2 + (laddrDst & 0xfff)) >> 1; pointerDelta = -2; } else { // Counting upward - wordsFitDst = (0x1000 - (paddrDst & 0xfff)) >> 1; + wordsFitDst = (0x1000 - (laddrDst & 0xfff)) >> 1; pointerDelta = 2; } @@ -166,8 +174,9 @@ Bit32u BX_CPU_C::FastRepINSW(bxInstruction_c *i, bx_address dstOff, Bit16u port, Bit32u BX_CPU_C::FastRepOUTSW(bxInstruction_c *i, unsigned srcSeg, bx_address srcOff, Bit16u port, Bit32u wordCount) { - Bit32u paddrSrc, wordsFitSrc; + Bit32u wordsFitSrc; signed int pointerDelta; + Bit8u *hostAddrSrc; bx_segment_reg_t *srcSegPtr = &BX_CPU_THIS_PTR sregs[srcSeg]; @@ -179,106 +188,113 @@ Bit32u BX_CPU_C::FastRepOUTSW(bxInstruction_c *i, unsigned srcSeg, bx_address sr read_virtual_checks(srcSegPtr, srcOff, 2); bx_address laddrSrc = BX_CPU_THIS_PTR get_segment_base(srcSeg) + srcOff; + // check that the address is word aligned + if (laddrSrc & 1) return 0; + +#if BX_SupportGuest2HostTLB + hostAddrSrc = v2h_read_byte(laddrSrc, CPL==3); +#else + bx_phy_address paddrSrc; + if (BX_CPU_THIS_PTR cr0.get_PG()) paddrSrc = dtranslate_linear(laddrSrc, CPL==3, BX_READ); else paddrSrc = laddrSrc; + // If we want to write directly into the physical memory array, // we need the A20 address. - paddrSrc = A20ADDR(paddrSrc); - - Bit8u *hostAddrSrc = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, - paddrSrc, BX_READ, DATA_ACCESS); - - // Check that native host access was not vetoed for that page, and - // that the address is word aligned. - if (hostAddrSrc && ! (paddrSrc & 1)) { - // See how many words can fit in the rest of this page. - if (BX_CPU_THIS_PTR get_DF()) { - // Counting downward - // Note: 1st word must not cross page boundary. - if ((paddrSrc & 0xfff) > 0xffe) return 0; - wordsFitSrc = (2 + (paddrSrc & 0xfff)) >> 1; - pointerDelta = (unsigned) -2; - } - else { - // Counting upward - wordsFitSrc = (0x1000 - (paddrSrc & 0xfff)) >> 1; - pointerDelta = 2; - } - - // Restrict word count to the number that will fit in this page. - if (wordCount > wordsFitSrc) - wordCount = wordsFitSrc; - - // If after all the restrictions, there is anything left to do... - if (wordCount) { - Bit32u srcSegLimit = srcSegPtr->cache.u.segment.limit_scaled; - unsigned count; - - // For 16-bit addressing mode, clamp the segment limits to 16bits - // so we don't have to worry about computations using si/di - // rolling over 16-bit boundaries. - if (!i->as32L()) { - if (srcSegLimit > 0xffff) - srcSegLimit = 0xffff; - } - - // Before we copy memory, we need to make sure that the segments - // allow the accesses up to the given source and dest offset. If - // the cache.valid bits have SegAccessWOK and ROK, we know that - // the cache is valid for those operations, and that the segments - // are non-expand down (thus we can make a simple limit check). - if ( !(srcSegPtr->cache.valid & SegAccessROK) ) return 0; - - if (BX_CPU_THIS_PTR cpu_mode != BX_MODE_LONG_64) - { - // Now make sure transfer will fit within the constraints of the - // segment boundaries, 0..limit for non expand-down. We know - // wordCount >= 1 here. - if (BX_CPU_THIS_PTR get_DF()) { - // Counting downward - Bit32u minOffset = (wordCount-1) << 1; - if (srcOff < minOffset) return 0; - } - else { - // Counting upward - Bit32u srcMaxOffset = (srcSegLimit - (wordCount<<1)) + 1; - if (srcOff > srcMaxOffset) return 0; - } - } - - for (count=0; count> 8) | (temp16 << 8)), 2); + hostAddrSrc = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, + A20ADDR(paddrSrc), BX_READ, DATA_ACCESS); #endif - if (bx_devices.bulkIOQuantumsTransferred) { - hostAddrSrc = bx_devices.bulkIOHostAddr; - count += bx_devices.bulkIOQuantumsTransferred; - } - else { - hostAddrSrc += pointerDelta; - count++; - } - // Terminate early if there was an event. - if (BX_CPU_THIS_PTR async_event) break; - } - // Reset for next non-bulk IO - bx_devices.bulkIOQuantumsRequested = 0; + // Check that native host access was not vetoed for that page + if (!hostAddrSrc) return 0; - return count; + // See how many words can fit in the rest of this page. + if (BX_CPU_THIS_PTR get_DF()) { + // Counting downward + // Note: 1st word must not cross page boundary. + if ((laddrSrc & 0xfff) > 0xffe) return 0; + wordsFitSrc = (2 + (laddrSrc & 0xfff)) >> 1; + pointerDelta = (unsigned) -2; + } + else { + // Counting upward + wordsFitSrc = (0x1000 - (laddrSrc & 0xfff)) >> 1; + pointerDelta = 2; + } + + // Restrict word count to the number that will fit in this page. + if (wordCount > wordsFitSrc) + wordCount = wordsFitSrc; + + // If after all the restrictions, there is anything left to do... + if (wordCount) { + Bit32u srcSegLimit = srcSegPtr->cache.u.segment.limit_scaled; + unsigned count; + + // For 16-bit addressing mode, clamp the segment limits to 16bits + // so we don't have to worry about computations using si/di + // rolling over 16-bit boundaries. + if (!i->as32L()) { + if (srcSegLimit > 0xffff) + srcSegLimit = 0xffff; } + + // Before we copy memory, we need to make sure that the segments + // allow the accesses up to the given source and dest offset. If + // the cache.valid bits have SegAccessWOK and ROK, we know that + // the cache is valid for those operations, and that the segments + // are non-expand down (thus we can make a simple limit check). + if ( !(srcSegPtr->cache.valid & SegAccessROK) ) return 0; + + if (BX_CPU_THIS_PTR cpu_mode != BX_MODE_LONG_64) + { + // Now make sure transfer will fit within the constraints of the + // segment boundaries, 0..limit for non expand-down. We know + // wordCount >= 1 here. + if (BX_CPU_THIS_PTR get_DF()) { + // Counting downward + Bit32u minOffset = (wordCount-1) << 1; + if (srcOff < minOffset) return 0; + } + else { + // Counting upward + Bit32u srcMaxOffset = (srcSegLimit - (wordCount<<1)) + 1; + if (srcOff > srcMaxOffset) return 0; + } + } + + for (count=0; count> 8) | (temp16 << 8)), 2); +#endif + if (bx_devices.bulkIOQuantumsTransferred) { + hostAddrSrc = bx_devices.bulkIOHostAddr; + count += bx_devices.bulkIOQuantumsTransferred; + } + else { + hostAddrSrc += pointerDelta; + count++; + } + // Terminate early if there was an event. + if (BX_CPU_THIS_PTR async_event) break; + } + + // Reset for next non-bulk IO + bx_devices.bulkIOQuantumsRequested = 0; + + return count; } return 0; @@ -425,7 +441,7 @@ void BX_CPU_C::INSW_YwDX(bxInstruction_c *i) if (i->as32L()) RCX = ECX - (wordCount-1); else - CX -= (wordCount-1); + CX -= (wordCount-1); incr = wordCount << 1; // count * 2. goto doIncr; @@ -490,7 +506,7 @@ void BX_CPU_C::INSD_YdDX(bxInstruction_c *i) if (i->as32L()) edi = EDI; else - edi = DI; + edi = DI; Bit32u value32=0; @@ -586,9 +602,9 @@ void BX_CPU_C::OUTSB_DXXb(bxInstruction_c *i) #endif if (i->as32L()) { if (BX_CPU_THIS_PTR get_DF()) - RSI--; + RSI = ESI-1; else - RSI++; + RSI = ESI+1; } else { if (BX_CPU_THIS_PTR get_DF()) @@ -638,12 +654,10 @@ void BX_CPU_C::OUTSW_DXXw(bxInstruction_c *i) if (i->as32L()) wordCount = ECX; else - wordCount = CX; + wordCount = CX; - BX_ASSERT(wordCount > 0); wordCount = FastRepOUTSW(i, i->seg(), esi, DX, wordCount); - if (wordCount) - { + if (wordCount) { // Decrement eCX. Note, the main loop will decrement 1 also, so // decrement by one less than expected, like the case above. BX_TICKN(wordCount-1); // Main cpu loop also decrements one more. @@ -656,21 +670,20 @@ void BX_CPU_C::OUTSW_DXXw(bxInstruction_c *i) if (i->as32L()) RCX = ECX - (wordCount-1); else - CX -= (wordCount-1); + CX -= (wordCount-1); incr = wordCount << 1; // count * 2. - goto doIncr; + } + else { + read_virtual_word(i->seg(), esi, &value16); + BX_OUTP(DX, value16, 2); } } - -#endif - - read_virtual_word(i->seg(), esi, &value16); - BX_OUTP(DX, value16, 2); - incr = 2; - -#if (BX_SupportRepeatSpeedups) && (BX_DEBUGGER == 0) -doIncr: + else #endif + { + read_virtual_word(i->seg(), esi, &value16); + BX_OUTP(DX, value16, 2); + } #if BX_SUPPORT_X86_64 if (i->as64L()) { diff --git a/bochs/cpu/string.cc b/bochs/cpu/string.cc index 0f359458b..e2cf50695 100644 --- a/bochs/cpu/string.cc +++ b/bochs/cpu/string.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: string.cc,v 1.40 2007-10-10 22:20:32 sshwarts Exp $ +// $Id: string.cc,v 1.41 2007-10-29 15:39:18 sshwarts Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -49,7 +49,7 @@ Bit32u BX_CPU_C::FastRepMOVSB(bxInstruction_c *i, unsigned srcSeg, bx_address sr Bit32u bytesFitSrc, bytesFitDst; signed int pointerDelta; bx_address laddrDst, laddrSrc; - Bit32u paddrDst, paddrSrc; + Bit8u *hostAddrSrc, *hostAddrDst; bx_segment_reg_t *srcSegPtr = &BX_CPU_THIS_PTR sregs[srcSeg]; bx_segment_reg_t *dstSegPtr = &BX_CPU_THIS_PTR sregs[dstSeg]; @@ -61,6 +61,12 @@ Bit32u BX_CPU_C::FastRepMOVSB(bxInstruction_c *i, unsigned srcSeg, bx_address sr // without generating an exception. read_virtual_checks(srcSegPtr, srcOff, 1); laddrSrc = BX_CPU_THIS_PTR get_segment_base(srcSeg) + srcOff; + +#if BX_SupportGuest2HostTLB + hostAddrSrc = v2h_read_byte(laddrSrc, CPL==3); +#else + bx_phy_address paddrSrc; + if (BX_CPU_THIS_PTR cr0.get_PG()) { paddrSrc = dtranslate_linear(laddrSrc, CPL==3, BX_READ); } @@ -70,14 +76,20 @@ Bit32u BX_CPU_C::FastRepMOVSB(bxInstruction_c *i, unsigned srcSeg, bx_address sr // If we want to write directly into the physical memory array, // we need the A20 address. - paddrSrc = A20ADDR(paddrSrc); - Bit8u *hostAddrSrc = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, - paddrSrc, BX_READ, DATA_ACCESS); + hostAddrSrc = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, + A20ADDR(paddrSrc), BX_READ, DATA_ACCESS); +#endif if (! hostAddrSrc) return 0; write_virtual_checks(dstSegPtr, dstOff, 1); laddrDst = BX_CPU_THIS_PTR get_segment_base(dstSeg) + dstOff; + +#if BX_SupportGuest2HostTLB + hostAddrDst = v2h_write_byte(laddrDst, CPL==3); +#else + bx_phy_address paddrDst; + if (BX_CPU_THIS_PTR cr0.get_PG()) { paddrDst = dtranslate_linear(laddrDst, CPL==3, BX_WRITE); } @@ -87,23 +99,23 @@ Bit32u BX_CPU_C::FastRepMOVSB(bxInstruction_c *i, unsigned srcSeg, bx_address sr // If we want to write directly into the physical memory array, // we need the A20 address. - paddrDst = A20ADDR(paddrDst); - Bit8u *hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, - paddrDst, BX_WRITE, DATA_ACCESS); + hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, + A20ADDR(paddrDst), BX_WRITE, DATA_ACCESS); +#endif if (! hostAddrDst) return 0; // See how many bytes can fit in the rest of this page. if (BX_CPU_THIS_PTR get_DF()) { // Counting downward. - bytesFitSrc = 1 + (paddrSrc & 0xfff); - bytesFitDst = 1 + (paddrDst & 0xfff); + bytesFitSrc = 1 + (laddrSrc & 0xfff); + bytesFitDst = 1 + (laddrDst & 0xfff); pointerDelta = (signed int) -1; } else { // Counting upward. - bytesFitSrc = (0x1000 - (paddrSrc & 0xfff)); - bytesFitDst = (0x1000 - (paddrDst & 0xfff)); + bytesFitSrc = (0x1000 - (laddrSrc & 0xfff)); + bytesFitDst = (0x1000 - (laddrDst & 0xfff)); pointerDelta = (signed int) 1; } @@ -183,7 +195,7 @@ Bit32u BX_CPU_C::FastRepMOVSW(bxInstruction_c *i, unsigned srcSeg, bx_address sr Bit32u wordsFitSrc, wordsFitDst; signed int pointerDelta; bx_address laddrDst, laddrSrc; - Bit32u paddrDst, paddrSrc; + Bit8u *hostAddrSrc, *hostAddrDst; bx_segment_reg_t *srcSegPtr = &BX_CPU_THIS_PTR sregs[srcSeg]; bx_segment_reg_t *dstSegPtr = &BX_CPU_THIS_PTR sregs[dstSeg]; @@ -195,6 +207,12 @@ Bit32u BX_CPU_C::FastRepMOVSW(bxInstruction_c *i, unsigned srcSeg, bx_address sr // without generating an exception. read_virtual_checks(srcSegPtr, srcOff, 2); laddrSrc = BX_CPU_THIS_PTR get_segment_base(srcSeg) + srcOff; + +#if BX_SupportGuest2HostTLB + hostAddrSrc = v2h_read_byte(laddrSrc, CPL==3); +#else + bx_phy_address paddrSrc; + if (BX_CPU_THIS_PTR cr0.get_PG()) { paddrSrc = dtranslate_linear(laddrSrc, CPL==3, BX_READ); } @@ -204,14 +222,20 @@ Bit32u BX_CPU_C::FastRepMOVSW(bxInstruction_c *i, unsigned srcSeg, bx_address sr // If we want to write directly into the physical memory array, // we need the A20 address. - paddrSrc = A20ADDR(paddrSrc); - Bit8u *hostAddrSrc = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, - paddrSrc, BX_READ, DATA_ACCESS); + hostAddrSrc = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, + A20ADDR(paddrSrc), BX_READ, DATA_ACCESS); +#endif if (! hostAddrSrc) return 0; write_virtual_checks(dstSegPtr, dstOff, 2); laddrDst = BX_CPU_THIS_PTR get_segment_base(dstSeg) + dstOff; + +#if BX_SupportGuest2HostTLB + hostAddrDst = v2h_write_byte(laddrDst, CPL==3); +#else + bx_phy_address paddrDst; + if (BX_CPU_THIS_PTR cr0.get_PG()) { paddrDst = dtranslate_linear(laddrDst, CPL==3, BX_WRITE); } @@ -221,9 +245,9 @@ Bit32u BX_CPU_C::FastRepMOVSW(bxInstruction_c *i, unsigned srcSeg, bx_address sr // If we want to write directly into the physical memory array, // we need the A20 address. - paddrDst = A20ADDR(paddrDst); - Bit8u *hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, - paddrDst, BX_WRITE, DATA_ACCESS); + hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, + A20ADDR(paddrDst), BX_WRITE, DATA_ACCESS); +#endif if (! hostAddrDst) return 0; @@ -231,16 +255,16 @@ Bit32u BX_CPU_C::FastRepMOVSW(bxInstruction_c *i, unsigned srcSeg, bx_address sr if (BX_CPU_THIS_PTR get_DF()) { // Counting downward. // Note: 1st word must not cross page boundary. - if ( ((paddrSrc & 0xfff) > 0xffe) || ((paddrDst & 0xfff) > 0xffe) ) + if ( ((laddrSrc & 0xfff) > 0xffe) || ((laddrDst & 0xfff) > 0xffe) ) return 0; - wordsFitSrc = (2 + (paddrSrc & 0xfff)) >> 1; - wordsFitDst = (2 + (paddrDst & 0xfff)) >> 1; + wordsFitSrc = (2 + (laddrSrc & 0xfff)) >> 1; + wordsFitDst = (2 + (laddrDst & 0xfff)) >> 1; pointerDelta = (signed int) -2; } else { // Counting upward. - wordsFitSrc = (0x1000 - (paddrSrc & 0xfff)) >> 1; - wordsFitDst = (0x1000 - (paddrDst & 0xfff)) >> 1; + wordsFitSrc = (0x1000 - (laddrSrc & 0xfff)) >> 1; + wordsFitDst = (0x1000 - (laddrDst & 0xfff)) >> 1; pointerDelta = (signed int) 2; } @@ -321,7 +345,7 @@ Bit32u BX_CPU_C::FastRepMOVSD(bxInstruction_c *i, unsigned srcSeg, bx_address sr Bit32u dwordsFitSrc, dwordsFitDst; signed int pointerDelta; bx_address laddrDst, laddrSrc; - Bit32u paddrDst, paddrSrc; + Bit8u *hostAddrSrc, *hostAddrDst; bx_segment_reg_t *srcSegPtr = &BX_CPU_THIS_PTR sregs[srcSeg]; bx_segment_reg_t *dstSegPtr = &BX_CPU_THIS_PTR sregs[dstSeg]; @@ -333,6 +357,12 @@ Bit32u BX_CPU_C::FastRepMOVSD(bxInstruction_c *i, unsigned srcSeg, bx_address sr // without generating an exception. read_virtual_checks(srcSegPtr, srcOff, 4); laddrSrc = BX_CPU_THIS_PTR get_segment_base(srcSeg) + srcOff; + +#if BX_SupportGuest2HostTLB + hostAddrSrc = v2h_read_byte(laddrSrc, CPL==3); +#else + bx_phy_address paddrSrc; + if (BX_CPU_THIS_PTR cr0.get_PG()) { paddrSrc = dtranslate_linear(laddrSrc, CPL==3, BX_READ); } @@ -342,14 +372,20 @@ Bit32u BX_CPU_C::FastRepMOVSD(bxInstruction_c *i, unsigned srcSeg, bx_address sr // If we want to write directly into the physical memory array, // we need the A20 address. - paddrSrc = A20ADDR(paddrSrc); - Bit8u *hostAddrSrc = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, - paddrSrc, BX_READ, DATA_ACCESS); + hostAddrSrc = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, + A20ADDR(paddrSrc), BX_READ, DATA_ACCESS); +#endif if (! hostAddrSrc) return 0; write_virtual_checks(dstSegPtr, dstOff, 4); laddrDst = BX_CPU_THIS_PTR get_segment_base(dstSeg) + dstOff; + +#if BX_SupportGuest2HostTLB + hostAddrDst = v2h_write_byte(laddrDst, CPL==3); +#else + bx_phy_address paddrDst; + if (BX_CPU_THIS_PTR cr0.get_PG()) { paddrDst = dtranslate_linear(laddrDst, CPL==3, BX_WRITE); } @@ -359,9 +395,9 @@ Bit32u BX_CPU_C::FastRepMOVSD(bxInstruction_c *i, unsigned srcSeg, bx_address sr // If we want to write directly into the physical memory array, // we need the A20 address. - paddrDst = A20ADDR(paddrDst); - Bit8u *hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, - paddrDst, BX_WRITE, DATA_ACCESS); + hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, + A20ADDR(paddrDst), BX_WRITE, DATA_ACCESS); +#endif if (! hostAddrDst) return 0; @@ -369,16 +405,16 @@ Bit32u BX_CPU_C::FastRepMOVSD(bxInstruction_c *i, unsigned srcSeg, bx_address sr if (BX_CPU_THIS_PTR get_DF()) { // Counting downward. // Note: 1st dword must not cross page boundary. - if ( ((paddrSrc & 0xfff) > 0xffc) || ((paddrDst & 0xfff) > 0xffc) ) + if ( ((laddrSrc & 0xfff) > 0xffc) || ((laddrDst & 0xfff) > 0xffc) ) return 0; - dwordsFitSrc = (4 + (paddrSrc & 0xfff)) >> 2; - dwordsFitDst = (4 + (paddrDst & 0xfff)) >> 2; + dwordsFitSrc = (4 + (laddrSrc & 0xfff)) >> 2; + dwordsFitDst = (4 + (laddrDst & 0xfff)) >> 2; pointerDelta = (signed int) -4; } else { // Counting upward. - dwordsFitSrc = (0x1000 - (paddrSrc & 0xfff)) >> 2; - dwordsFitDst = (0x1000 - (paddrDst & 0xfff)) >> 2; + dwordsFitSrc = (0x1000 - (laddrSrc & 0xfff)) >> 2; + dwordsFitDst = (0x1000 - (laddrDst & 0xfff)) >> 2; pointerDelta = (signed int) 4; } @@ -459,12 +495,18 @@ Bit32u BX_CPU_C::FastRepSTOSB(bxInstruction_c *i, unsigned dstSeg, bx_address ds Bit32u bytesFitDst; signed int pointerDelta; bx_address laddrDst; - Bit32u paddrDst; + Bit8u *hostAddrDst; bx_segment_reg_t *dstSegPtr = &BX_CPU_THIS_PTR sregs[dstSeg]; write_virtual_checks(dstSegPtr, dstOff, 1); laddrDst = BX_CPU_THIS_PTR get_segment_base(dstSeg) + dstOff; + +#if BX_SupportGuest2HostTLB + hostAddrDst = v2h_write_byte(laddrDst, CPL==3); +#else + bx_phy_address paddrDst; + if (BX_CPU_THIS_PTR cr0.get_PG()) { paddrDst = dtranslate_linear(laddrDst, CPL==3, BX_WRITE); } @@ -474,21 +516,21 @@ Bit32u BX_CPU_C::FastRepSTOSB(bxInstruction_c *i, unsigned dstSeg, bx_address ds // If we want to write directly into the physical memory array, // we need the A20 address. - paddrDst = A20ADDR(paddrDst); - Bit8u *hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, - paddrDst, BX_WRITE, DATA_ACCESS); + hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, + A20ADDR(paddrDst), BX_WRITE, DATA_ACCESS); +#endif if (! hostAddrDst) return 0; // See how many bytes can fit in the rest of this page. if (BX_CPU_THIS_PTR get_DF()) { // Counting downward. - bytesFitDst = 1 + (paddrDst & 0xfff); + bytesFitDst = 1 + (laddrDst & 0xfff); pointerDelta = (signed int) -1; } else { // Counting upward. - bytesFitDst = (0x1000 - (paddrDst & 0xfff)); + bytesFitDst = (0x1000 - (laddrDst & 0xfff)); pointerDelta = (signed int) 1; } @@ -553,12 +595,18 @@ Bit32u BX_CPU_C::FastRepSTOSW(bxInstruction_c *i, unsigned dstSeg, bx_address ds Bit32u wordsFitDst; signed int pointerDelta; bx_address laddrDst; - Bit32u paddrDst; + Bit8u *hostAddrDst; bx_segment_reg_t *dstSegPtr = &BX_CPU_THIS_PTR sregs[dstSeg]; write_virtual_checks(dstSegPtr, dstOff, 2); laddrDst = BX_CPU_THIS_PTR get_segment_base(dstSeg) + dstOff; + +#if BX_SupportGuest2HostTLB + hostAddrDst = v2h_write_byte(laddrDst, CPL==3); +#else + bx_phy_address paddrDst; + if (BX_CPU_THIS_PTR cr0.get_PG()) { paddrDst = dtranslate_linear(laddrDst, CPL==3, BX_WRITE); } @@ -568,9 +616,9 @@ Bit32u BX_CPU_C::FastRepSTOSW(bxInstruction_c *i, unsigned dstSeg, bx_address ds // If we want to write directly into the physical memory array, // we need the A20 address. - paddrDst = A20ADDR(paddrDst); - Bit8u *hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, - paddrDst, BX_WRITE, DATA_ACCESS); + hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, + A20ADDR(paddrDst), BX_WRITE, DATA_ACCESS); +#endif if (! hostAddrDst) return 0; @@ -578,13 +626,13 @@ Bit32u BX_CPU_C::FastRepSTOSW(bxInstruction_c *i, unsigned dstSeg, bx_address ds if (BX_CPU_THIS_PTR get_DF()) { // Counting downward. // Note: 1st word must not cross page boundary. - if ((paddrDst & 0xfff) > 0xffe) return 0; - wordsFitDst = (2 + (paddrDst & 0xfff)) >> 1; + if ((laddrDst & 0xfff) > 0xffe) return 0; + wordsFitDst = (2 + (laddrDst & 0xfff)) >> 1; pointerDelta = (signed int) -2; } else { // Counting upward. - wordsFitDst = (0x1000 - (paddrDst & 0xfff)) >> 1; + wordsFitDst = (0x1000 - (laddrDst & 0xfff)) >> 1; pointerDelta = (signed int) 2; } @@ -650,12 +698,18 @@ Bit32u BX_CPU_C::FastRepSTOSD(bxInstruction_c *i, unsigned dstSeg, bx_address ds Bit32u dwordsFitDst; signed int pointerDelta; bx_address laddrDst; - Bit32u paddrDst; + Bit8u *hostAddrDst; bx_segment_reg_t *dstSegPtr = &BX_CPU_THIS_PTR sregs[dstSeg]; write_virtual_checks(dstSegPtr, dstOff, 4); laddrDst = BX_CPU_THIS_PTR get_segment_base(dstSeg) + dstOff; + +#if BX_SupportGuest2HostTLB + hostAddrDst = v2h_write_byte(laddrDst, CPL==3); +#else + bx_phy_address paddrDst; + if (BX_CPU_THIS_PTR cr0.get_PG()) { paddrDst = dtranslate_linear(laddrDst, CPL==3, BX_WRITE); } @@ -665,9 +719,9 @@ Bit32u BX_CPU_C::FastRepSTOSD(bxInstruction_c *i, unsigned dstSeg, bx_address ds // If we want to write directly into the physical memory array, // we need the A20 address. - paddrDst = A20ADDR(paddrDst); - Bit8u *hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, - paddrDst, BX_WRITE, DATA_ACCESS); + hostAddrDst = BX_CPU_THIS_PTR mem->getHostMemAddr(BX_CPU_THIS, + A20ADDR(paddrDst), BX_WRITE, DATA_ACCESS); +#endif if (! hostAddrDst) return 0; @@ -675,13 +729,13 @@ Bit32u BX_CPU_C::FastRepSTOSD(bxInstruction_c *i, unsigned dstSeg, bx_address ds if (BX_CPU_THIS_PTR get_DF()) { // Counting downward. // Note: 1st dword must not cross page boundary. - if ((paddrDst & 0xfff) > 0xffc) return 0; - dwordsFitDst = (4 + (paddrDst & 0xfff)) >> 2; + if ((laddrDst & 0xfff) > 0xffc) return 0; + dwordsFitDst = (4 + (laddrDst & 0xfff)) >> 2; pointerDelta = (signed int) -4; } else { // Counting upward. - dwordsFitDst = (0x1000 - (paddrDst & 0xfff)) >> 2; + dwordsFitDst = (0x1000 - (laddrDst & 0xfff)) >> 2; pointerDelta = (signed int) 4; } @@ -779,6 +833,7 @@ void BX_CPU_C::REP_MOVSQ_XqYq(bxInstruction_c *i) void BX_CPU_C::MOVSB_XbYb(bxInstruction_c *i) { Bit8u temp8; + Bit32u incr = 1; #if BX_SUPPORT_X86_64 if (i->as64L()) { @@ -806,44 +861,57 @@ void BX_CPU_C::MOVSB_XbYb(bxInstruction_c *i) #endif // #if BX_SUPPORT_X86_64 if (i->as32L()) { - Bit32u esi = ESI; - Bit32u edi = EDI; - - read_virtual_byte(i->seg(), esi, &temp8); - write_virtual_byte(BX_SEG_REG_ES, edi, &temp8); - - if (BX_CPU_THIS_PTR get_DF()) { - /* decrement ESI, EDI */ - esi--; - edi--; - } - else { - /* increment ESI, EDI */ - esi++; - edi++; - } - - // zero extension of RSI/RDI - RSI = esi; - RDI = edi; - } - else - { /* 16 bit address mode */ - unsigned incr = 1; - - Bit16u si = SI; - Bit16u di = DI; - #if (BX_SupportRepeatSpeedups) && (BX_DEBUGGER == 0) /* If conditions are right, we can transfer IO to physical memory * in a batch, rather than one instruction at a time */ if (i->repUsedL() && !BX_CPU_THIS_PTR async_event) { - Bit32u byteCount = CX; - BX_ASSERT(byteCount > 0); - byteCount = FastRepMOVSB(i, i->seg(), si, BX_SEG_REG_ES, di, byteCount); - if (byteCount) - { + Bit32u byteCount = FastRepMOVSB(i, i->seg(), ESI, BX_SEG_REG_ES, EDI, ECX); + if (byteCount) { + // Decrement the ticks count by the number of iterations, minus + // one, since the main cpu loop will decrement one. Also, + // the count is predecremented before examined, so defintely + // don't roll it under zero. + BX_TICKN(byteCount-1); + + // Decrement eCX. Note, the main loop will decrement 1 also, so + // decrement by one less than expected, like the case above. + RCX = ECX - (byteCount-1); + + incr = byteCount; + } + else { + read_virtual_byte(i->seg(), ESI, &temp8); + write_virtual_byte(BX_SEG_REG_ES, EDI, &temp8); + } + } + else +#endif + { + read_virtual_byte(i->seg(), ESI, &temp8); + write_virtual_byte(BX_SEG_REG_ES, EDI, &temp8); + } + + if (BX_CPU_THIS_PTR get_DF()) { + /* decrement ESI, EDI */ + RSI = ESI - incr; + RDI = EDI - incr; + } + else { + /* increment ESI, EDI */ + RSI = ESI + incr; + RDI = EDI + incr; + } + } + else /* 16 bit address mode */ + { +#if (BX_SupportRepeatSpeedups) && (BX_DEBUGGER == 0) + /* If conditions are right, we can transfer IO to physical memory + * in a batch, rather than one instruction at a time */ + if (i->repUsedL() && !BX_CPU_THIS_PTR async_event) + { + Bit32u byteCount = FastRepMOVSB(i, i->seg(), SI, BX_SEG_REG_ES, DI, CX); + if (byteCount) { // Decrement the ticks count by the number of iterations, minus // one, since the main cpu loop will decrement one. Also, // the count is predecremented before examined, so defintely @@ -855,31 +923,29 @@ void BX_CPU_C::MOVSB_XbYb(bxInstruction_c *i) CX -= (byteCount-1); incr = byteCount; - goto doIncr; + } + else { + read_virtual_byte(i->seg(), SI, &temp8); + write_virtual_byte(BX_SEG_REG_ES, DI, &temp8); } } + else #endif - - read_virtual_byte(i->seg(), si, &temp8); - write_virtual_byte(BX_SEG_REG_ES, di, &temp8); - -#if (BX_SupportRepeatSpeedups) && (BX_DEBUGGER == 0) -doIncr: -#endif + { + read_virtual_byte(i->seg(), SI, &temp8); + write_virtual_byte(BX_SEG_REG_ES, DI, &temp8); + } if (BX_CPU_THIS_PTR get_DF()) { /* decrement SI, DI */ - si -= incr; - di -= incr; + SI -= incr; + DI -= incr; } else { /* increment SI, DI */ - si += incr; - di += incr; + SI += incr; + DI += incr; } - - SI = si; - DI = di; } } @@ -931,8 +997,8 @@ void BX_CPU_C::MOVSW_XwYw(bxInstruction_c *i) RSI = esi; RDI = edi; } - else - { /* 16bit address mode */ + else /* 16bit address mode */ + { unsigned incr = 2; Bit16u si = SI; @@ -944,11 +1010,8 @@ void BX_CPU_C::MOVSW_XwYw(bxInstruction_c *i) */ if (i->repUsedL() && !BX_CPU_THIS_PTR async_event) { - Bit32u wordCount = CX; - BX_ASSERT(wordCount > 0); - wordCount = FastRepMOVSW(i, i->seg(), si, BX_SEG_REG_ES, di, wordCount); - if (wordCount) - { + Bit32u wordCount = FastRepMOVSW(i, i->seg(), si, BX_SEG_REG_ES, di, CX); + if (wordCount) { // Decrement the ticks count by the number of iterations, minus // one, since the main cpu loop will decrement one. Also, // the count is predecremented before examined, so defintely @@ -960,17 +1023,18 @@ void BX_CPU_C::MOVSW_XwYw(bxInstruction_c *i) CX -= (wordCount-1); incr = wordCount << 1; // count * 2 - goto doIncr; + } + else { + read_virtual_word(i->seg(), si, &temp16); + write_virtual_word(BX_SEG_REG_ES, di, &temp16); } } + else #endif - - read_virtual_word(i->seg(), si, &temp16); - write_virtual_word(BX_SEG_REG_ES, di, &temp16); - -#if (BX_SupportRepeatSpeedups) && (BX_DEBUGGER == 0) -doIncr: -#endif + { + read_virtual_word(i->seg(), si, &temp16); + write_virtual_word(BX_SEG_REG_ES, di, &temp16); + } if (BX_CPU_THIS_PTR get_DF()) { /* decrement SI, DI */ @@ -1028,11 +1092,8 @@ void BX_CPU_C::MOVSD_XdYd(bxInstruction_c *i) */ if (i->repUsedL() && !BX_CPU_THIS_PTR async_event) { - Bit32u dwordCount = ECX; - BX_ASSERT(dwordCount > 0); - dwordCount = FastRepMOVSD(i, i->seg(), esi, BX_SEG_REG_ES, edi, dwordCount); - if (dwordCount) - { + Bit32u dwordCount = FastRepMOVSD(i, i->seg(), esi, BX_SEG_REG_ES, edi, ECX); + if (dwordCount) { // Decrement the ticks count by the number of iterations, minus // one, since the main cpu loop will decrement one. Also, // the count is predecremented before examined, so defintely @@ -1044,17 +1105,18 @@ void BX_CPU_C::MOVSD_XdYd(bxInstruction_c *i) RCX = ECX - (dwordCount-1); incr = dwordCount << 2; // count * 4 - goto doIncr; + } + else { + read_virtual_dword(i->seg(), esi, &temp32); + write_virtual_dword(BX_SEG_REG_ES, edi, &temp32); } } + else #endif - - read_virtual_dword(i->seg(), esi, &temp32); - write_virtual_dword(BX_SEG_REG_ES, edi, &temp32); - -#if (BX_SupportRepeatSpeedups) && (BX_DEBUGGER == 0) -doIncr: -#endif + { + read_virtual_dword(i->seg(), esi, &temp32); + write_virtual_dword(BX_SEG_REG_ES, edi, &temp32); + } if (BX_CPU_THIS_PTR get_DF()) { esi -= incr; @@ -1816,12 +1878,10 @@ void BX_CPU_C::STOSB_YbAL(bxInstruction_c *i) if (i->as32L()) byteCount = ECX; else - byteCount = CX; + byteCount = CX; - BX_ASSERT(byteCount); byteCount = FastRepSTOSB(i, BX_SEG_REG_ES, edi, al, byteCount); - if (byteCount) - { + if (byteCount) { // Decrement the ticks count by the number of iterations, minus // one, since the main cpu loop will decrement one. Also, // the count is predecremented before examined, so defintely @@ -1836,16 +1896,16 @@ void BX_CPU_C::STOSB_YbAL(bxInstruction_c *i) CX -= (byteCount-1); incr = byteCount; - goto doIncr; + } + else { + write_virtual_byte(BX_SEG_REG_ES, edi, &al); } } + else #endif - - write_virtual_byte(BX_SEG_REG_ES, edi, &al); - -#if (BX_SupportRepeatSpeedups) && (BX_DEBUGGER == 0) -doIncr: -#endif + { + write_virtual_byte(BX_SEG_REG_ES, edi, &al); + } if (BX_CPU_THIS_PTR get_DF()) { edi -= incr;