From fda1b874e997a22cfc0bd43adf0889f2c2895b6b Mon Sep 17 00:00:00 2001 From: Gregory Alexander Date: Mon, 3 Jun 2002 22:39:11 +0000 Subject: [PATCH] Check in FETCHDECODE Caching, with changes. Specific changes from the patch: 1.) renamed fdcache_eip to fdcache_ip, as it is using the RIP instead of the EIP. 2.) added a Boolean array fdcache_is32 which uses is32 to determine icache hits. Otherwise we could run 32-bit code as 16-bit or vice versa. Modified Files: config.h.in cpu/cpu.cc cpu/cpu.h memory/memory.cc --- bochs/config.h.in | 9 ++++ bochs/cpu/cpu.cc | 104 ++++++++++++++++++++++++++++++----------- bochs/cpu/cpu.h | 10 +++- bochs/memory/memory.cc | 23 ++++++++- 4 files changed, 117 insertions(+), 29 deletions(-) diff --git a/bochs/config.h.in b/bochs/config.h.in index 8e61cb94c..f17aa48d7 100644 --- a/bochs/config.h.in +++ b/bochs/config.h.in @@ -546,6 +546,15 @@ typedef unsigned int Boolean; #define BX_DYNAMIC_CPU_I386 0 #define BX_DYNAMIC_CPU_SPARC 0 +// caching of fetchdecode() calls +#define BX_FETCHDECODE_CACHE 0 + +#if BX_FETCHDECODE_CACHE + // The number of entries. MUST be a power of 2 + #define BX_FDCACHE_SIZE 0x0800 + #define BX_FDCACHE_MASK (BX_FDCACHE_SIZE-1) +#endif // BX_FETCHDECODE_CACHE + #define BX_SUPPORT_FPU 0 #define BX_HAVE_GETENV 0 diff --git a/bochs/cpu/cpu.cc b/bochs/cpu/cpu.cc index 5b5e2c8ce..303dcba65 100644 --- a/bochs/cpu/cpu.cc +++ b/bochs/cpu/cpu.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: cpu.cc,v 1.28 2002-04-18 00:22:19 bdenney Exp $ +// $Id: cpu.cc,v 1.29 2002-06-03 22:39:10 yakovlev Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -37,7 +37,12 @@ //unsigned counter[2] = { 0, 0 }; +#if BX_FETCHDECODE_CACHE + static unsigned long bx_fdcache_sel; + static unsigned long bx_fdcache_ip; + static Bit32u new_phy_addr; +#endif // BX_FETCHDECODE_CACHE #if BX_SIM_ID == 0 // only need to define once // This array defines a look-up table for the even parity-ness @@ -106,11 +111,16 @@ extern void REGISTER_IADDR(Bit32u addr); BX_CPU_C::cpu_loop(Bit32s max_instr_count) { unsigned ret; - BxInstruction_t i; + BxInstruction_t *i; unsigned maxisize; Bit8u *fetch_ptr; Boolean is_32; +#if !BX_FETCHDECODE_CACHE + BxInstruction_t bxinstruction_dummy; + i = &bxinstruction_dummy; +#endif // #if BX_FETCHDECODE_CACHE + #if BX_DEBUGGER BX_CPU_THIS_PTR break_point = 0; #ifdef MAGIC_BREAKPOINT @@ -214,18 +224,49 @@ async_events_processed: } fetch_ptr = BX_CPU_THIS_PTR fetch_ptr; +#if BX_FETCHDECODE_CACHE + bx_fdcache_ip = new_phy_addr; + bx_fdcache_sel = bx_fdcache_ip & BX_FDCACHE_MASK; + + i = &(BX_CPU_THIS_PTR fdcache_i[bx_fdcache_sel]); + + if ((BX_CPU_THIS_PTR fdcache_ip[bx_fdcache_sel] == bx_fdcache_ip) && + (BX_CPU_THIS_PTR fdcache_is32[bx_fdcache_sel] == is_32)) { + // HIT! ;^) + ret = 1; // success! + new_phy_addr += i->ilen; + } else { + // MISS :'( +#endif // #if BX_FETCHDECODE_CACHE + maxisize = 16; - if (BX_CPU_THIS_PTR bytesleft < 16) + if (BX_CPU_THIS_PTR bytesleft < 16) { maxisize = BX_CPU_THIS_PTR bytesleft; - ret = FetchDecode(fetch_ptr, &i, maxisize, is_32); + } + ret = FetchDecode(fetch_ptr, i, maxisize, is_32); + +#if BX_FETCHDECODE_CACHE + // The instruction straddles a page boundary. + // Not storing such instructions in the cache is probably the + // easiest way to handle them + if (ret) { + BX_CPU_THIS_PTR fdcache_ip[bx_fdcache_sel] = bx_fdcache_ip; + BX_CPU_THIS_PTR fdcache_is32[bx_fdcache_sel] = is_32; + new_phy_addr += i->ilen; + } else { + // Invalidate cache! + BX_CPU_THIS_PTR fdcache_ip[bx_fdcache_sel] = 0xFFFFFFFF; + } + } +#endif // #if BX_FETCHDECODE_CACHE if (ret) { - if (i.ResolveModrm) { + if (i->ResolveModrm) { // call method on BX_CPU_C object - BX_CPU_CALL_METHOD(i.ResolveModrm, (&i)); + BX_CPU_CALL_METHOD(i->ResolveModrm, (i)); } - BX_CPU_THIS_PTR fetch_ptr += i.ilen; - BX_CPU_THIS_PTR bytesleft -= i.ilen; + BX_CPU_THIS_PTR fetch_ptr += i->ilen; + BX_CPU_THIS_PTR bytesleft -= i->ilen; fetch_decode_OK: #if BX_DEBUGGER @@ -239,34 +280,34 @@ fetch_decode_OK: } #endif - if (i.rep_used && (i.attr & BxRepeatable)) { + if (i->rep_used && (i->attr & BxRepeatable)) { repeat_loop: - if (i.attr & BxRepeatableZF) { - if (i.as_32) { + if (i->attr & BxRepeatableZF) { + if (i->as_32) { if (ECX != 0) { - BX_CPU_CALL_METHOD(i.execute, (&i)); + BX_CPU_CALL_METHOD(i->execute, (i)); ECX -= 1; } - if ((i.rep_used==0xf3) && (get_ZF()==0)) goto repeat_done; - if ((i.rep_used==0xf2) && (get_ZF()!=0)) goto repeat_done; + if ((i->rep_used==0xf3) && (get_ZF()==0)) goto repeat_done; + if ((i->rep_used==0xf2) && (get_ZF()!=0)) goto repeat_done; if (ECX == 0) goto repeat_done; goto repeat_not_done; } else { if (CX != 0) { - BX_CPU_CALL_METHOD(i.execute, (&i)); + BX_CPU_CALL_METHOD(i->execute, (i)); CX -= 1; } - if ((i.rep_used==0xf3) && (get_ZF()==0)) goto repeat_done; - if ((i.rep_used==0xf2) && (get_ZF()!=0)) goto repeat_done; + if ((i->rep_used==0xf3) && (get_ZF()==0)) goto repeat_done; + if ((i->rep_used==0xf2) && (get_ZF()!=0)) goto repeat_done; if (CX == 0) goto repeat_done; goto repeat_not_done; } } else { // normal repeat, no concern for ZF - if (i.as_32) { + if (i->as_32) { if (ECX != 0) { - BX_CPU_CALL_METHOD(i.execute, (&i)); + BX_CPU_CALL_METHOD(i->execute, (i)); ECX -= 1; } if (ECX == 0) goto repeat_done; @@ -274,7 +315,7 @@ repeat_loop: } else { // 16bit addrsize if (CX != 0) { - BX_CPU_CALL_METHOD(i.execute, (&i)); + BX_CPU_CALL_METHOD(i->execute, (i)); CX -= 1; } if (CX == 0) goto repeat_done; @@ -302,12 +343,12 @@ repeat_not_done: repeat_done: - BX_CPU_THIS_PTR eip += i.ilen; + BX_CPU_THIS_PTR eip += i->ilen; } else { // non repeating instruction - BX_CPU_THIS_PTR eip += i.ilen; - BX_CPU_CALL_METHOD(i.execute, (&i)); + BX_CPU_THIS_PTR eip += i->ilen; + BX_CPU_CALL_METHOD(i->execute, (i)); } BX_CPU_THIS_PTR prev_eip = EIP; // commit new EIP @@ -410,17 +451,22 @@ static Bit8u FetchBuffer[16]; for (; j<16; j++) { FetchBuffer[j] = *temp_ptr++; } - ret = FetchDecode(FetchBuffer, &i, 16, is_32); + ret = FetchDecode(FetchBuffer, i, 16, is_32); if (ret==0) BX_PANIC(("fetchdecode: cross boundary: ret==0")); - if (i.ResolveModrm) { - BX_CPU_CALL_METHOD(i.ResolveModrm, (&i)); + if (i->ResolveModrm) { + BX_CPU_CALL_METHOD(i->ResolveModrm, (i)); } - remain = i.ilen - remain; + remain = i->ilen - remain; // note: eip has already been advanced to beginning of page BX_CPU_THIS_PTR fetch_ptr = fetch_ptr + remain; BX_CPU_THIS_PTR bytesleft -= remain; + + #if BX_FETCHDECODE_CACHE + new_phy_addr += remain; + #endif // BX_FETCHDECODE_CACHE + //BX_CPU_THIS_PTR eip += remain; BX_CPU_THIS_PTR eip = BX_CPU_THIS_PTR prev_eip; goto fetch_decode_OK; @@ -603,7 +649,9 @@ BX_CPU_C::prefetch(void) // cs:eIP // prefetch QSIZE byte quantity aligned on corresponding boundary Bit32u new_linear_addr; +#if !BX_FETCHDECODE_CACHE Bit32u new_phy_addr; +#endif // !BX_FETCHDECODE_CACHE Bit32u temp_eip, temp_limit; temp_eip = BX_CPU_THIS_PTR eip; @@ -664,7 +712,9 @@ BX_CPU_C::prefetch(void) BX_CPU_C::revalidate_prefetch_q(void) { Bit32u new_linear_addr, new_linear_page, new_linear_offset; +#if !BX_FETCHDECODE_CACHE Bit32u new_phy_addr; +#endif // !BX_FETCHDECODE_CACHE new_linear_addr = BX_CPU_THIS_PTR sregs[BX_SEG_REG_CS].cache.u.segment.base + BX_CPU_THIS_PTR eip; diff --git a/bochs/cpu/cpu.h b/bochs/cpu/cpu.h index fff76b10c..da979b271 100644 --- a/bochs/cpu/cpu.h +++ b/bochs/cpu/cpu.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: cpu.h,v 1.19 2002-04-18 00:22:19 bdenney Exp $ +// $Id: cpu.h,v 1.20 2002-06-03 22:39:10 yakovlev Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -1582,6 +1582,14 @@ public: // for now... bx_local_apic_c local_apic; Boolean int_from_local_apic; #endif + + #if BX_FETCHDECODE_CACHE + Bit32u fdcache_ip[BX_FDCACHE_SIZE]; // will store operation's IP + // NOTE: This struct should really be aligned! + BxInstruction_t fdcache_i[BX_FDCACHE_SIZE]; // stores decoded instruction + Boolean fdcache_is32[BX_FDCACHE_SIZE]; + #endif // #if BX_FETCHDECODE_CACHE + }; diff --git a/bochs/memory/memory.cc b/bochs/memory/memory.cc index 66031339f..7e11fd095 100644 --- a/bochs/memory/memory.cc +++ b/bochs/memory/memory.cc @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////// -// $Id: memory.cc,v 1.11 2002-04-03 16:48:15 japj Exp $ +// $Id: memory.cc,v 1.12 2002-06-03 22:39:11 yakovlev Exp $ ///////////////////////////////////////////////////////////////////////// // // Copyright (C) 2001 MandrakeSoft S.A. @@ -49,6 +49,27 @@ BX_MEM_C::write_physical(BX_CPU_C *cpu, Bit32u addr, unsigned len, void *data) a20addr = A20ADDR(addr); BX_INSTR_PHY_WRITE(a20addr, len); +#if BX_FETCHDECODE_CACHE + // NOTE: This piece should be put, if possible, where a write to the memory + // takes place. + // Here it trashes cache even for writes that would end up to ROM + + // Invalidate instruction cache for written addresses + // Instructions can be up to 16 bytes long, so I have to trash up to 15 bytes + // before write address (costly!) + // I think it would NOT be safe to invalidate up to the last instruction + // before the write because there COULD be programs which use + // jump-in-the-middle-of-an-instruction schemes (esp. copyprotection + // schemes) + unsigned long bx_fdcache_idx = addr - 15; + for (int count = 15+len; count > 0; --count) { + if (cpu->fdcache_ip[bx_fdcache_idx & BX_FDCACHE_MASK] == bx_fdcache_idx) { + cpu->fdcache_ip[bx_fdcache_idx & BX_FDCACHE_MASK] = 0xFFFFFFFF; + } + ++bx_fdcache_idx; + } +#endif // #if BX_FETCHDECODE_CACHE + #if BX_DEBUGGER // (mch) Check for physical write break points, TODO // (bbd) Each breakpoint should have an associated CPU#, TODO