Merge softfloat-fpu-implementation_ver4_branch branch

2004-06-18 14:11:11 +00:00 · 2004-06-18 14:11:11 +00:00 · 5c5b556f24
commit 5c5b556f24
parent 32a36a3ca7
89 changed files with 9419 additions and 14356 deletions
--- a/bochs/.conf.win32-cygwin
+++ b/bochs/.conf.win32-cygwin
@ -6,7 +6,7 @@

 CC="gcc"
 CXX="g++"
-CFLAGS="-O6 -march=pentium"
+CFLAGS="-O3 -march=pentium"
 CXXFLAGS="$CFLAGS"

 export CC
@ -17,5 +17,8 @@ export CXXFLAGS
 ./configure --enable-cdrom \
            --enable-sb16=win \
 	    --enable-ne2000 \
+            --enable-pci \
 	    --enable-vbe \
-		--enable-all-optimizations
+            --enable-all-optimizations \
+            --enable-cpu-level=6 \
+            --enable-sse=2 
--- a/bochs/config.h.in
+++ b/bochs/config.h.in
@ -731,6 +731,7 @@ typedef
 #define BX_SupportPAE 0
 #define BX_SupportICache 0
 #define BX_SupportHostAsms 0
+#define BX_SupportHostAsmsFpu 0


 // if 1, don't do gpf on MSRs that we don't implement
--- a/bochs/cpu/3dnow.cc
+++ b/bochs/cpu/3dnow.cc
@ -24,16 +24,14 @@

 #if BX_SUPPORT_3DNOW

-#include "softfloat.h"
-
 static void prepare_softfloat_status_word
 	(softfloat_status_word_t &status, int rounding_mode)
 {
-  status.float_detect_tininess = float_tininess_after_rounding;
  status.float_exception_flags = 0; // clear exceptions before execution
  status.float_nan_handling_mode = float_first_operand_nan;
  status.float_rounding_mode = rounding_mode;
  status.flush_underflow_to_zero = 0;
+  status.float_precision_lost_up = 0;
 }

 void BX_CPU_C::PFPNACC_PqQq(bxInstruction_c *i)
--- a/bochs/cpu/Makefile.in
+++ b/bochs/cpu/Makefile.in
--- a/bochs/cpu/access.cc
+++ b/bochs/cpu/access.cc
@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////
-// $Id: access.cc,v 1.42 2003-10-24 20:06:12 sshwarts Exp $
+// $Id: access.cc,v 1.43 2004-06-18 14:11:06 sshwarts Exp $
 /////////////////////////////////////////////////////////////////////////
 //
 //  Copyright (C) 2001  MandrakeSoft S.A.
@ -1234,3 +1234,23 @@ BX_CPU_C::write_virtual_dqword_aligned(unsigned s, bx_address offset, Bit8u *dat

  write_virtual_dqword(s, offset, data);
 }
+
+#if BX_SUPPORT_FPU
+
+  void BX_CPP_AttrRegparmN(3)
+BX_CPU_C::read_virtual_tword(unsigned s, bx_address offset, floatx80 *data)
+{
+  // read floating point register
+  read_virtual_qword(s, offset+0, &data->fraction);
+  read_virtual_word (s, offset+8, &data->exp);
+}
+
+  void BX_CPP_AttrRegparmN(3)
+BX_CPU_C::write_virtual_tword(unsigned s, bx_address offset, floatx80 *data)
+{
+  // store floating point register
+  write_virtual_qword(s, offset+0, &data->fraction);
+  write_virtual_word (s, offset+8, &data->exp);
+}
+
+#endif
--- a/bochs/cpu/cpu.h
+++ b/bochs/cpu/cpu.h
@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////
-// $Id: cpu.h,v 1.163 2004-05-10 21:05:47 sshwarts Exp $
+// $Id: cpu.h,v 1.164 2004-06-18 14:11:06 sshwarts Exp $
 /////////////////////////////////////////////////////////////////////////
 //
 //  Copyright (C) 2001  MandrakeSoft S.A.
@ -1179,7 +1179,7 @@ typedef struct {

 class BX_MEM_C;

-#if BX_SUPPORT_FPU || BX_SUPPORT_MMX
+#if BX_SUPPORT_FPU
 #include "cpu/i387.h"
 #endif

@ -1874,6 +1874,7 @@ union {
  BX_SMF void FWAIT(bxInstruction_c *);

 #if BX_SUPPORT_FPU
+  // load/store
  BX_SMF void FLD_STi(bxInstruction_c *);  
  BX_SMF void FLD_SINGLE_REAL(bxInstruction_c *);
  BX_SMF void FLD_DOUBLE_REAL(bxInstruction_c *);
@ -1881,8 +1882,36 @@ union {
  BX_SMF void FILD_WORD_INTEGER(bxInstruction_c *);
  BX_SMF void FILD_DWORD_INTEGER(bxInstruction_c *);
  BX_SMF void FILD_QWORD_INTEGER(bxInstruction_c *);  
+  BX_SMF void FBLD_PACKED_BCD(bxInstruction_c *);
+
+  BX_SMF void FST_STi(bxInstruction_c *);
+  BX_SMF void FST_SINGLE_REAL(bxInstruction_c *);
+  BX_SMF void FST_DOUBLE_REAL(bxInstruction_c *);
+  BX_SMF void FSTP_EXTENDED_REAL(bxInstruction_c *);
+  BX_SMF void FIST_WORD_INTEGER(bxInstruction_c *);
+  BX_SMF void FIST_DWORD_INTEGER(bxInstruction_c *);
+  BX_SMF void FISTP_QWORD_INTEGER(bxInstruction_c *);
+  BX_SMF void FBSTP_PACKED_BCD(bxInstruction_c *);
+
+  BX_SMF void FISTTP16(bxInstruction_c *);
+  BX_SMF void FISTTP32(bxInstruction_c *);
+  BX_SMF void FISTTP64(bxInstruction_c *);
+
+  // control
+  BX_SMF void FNINIT(bxInstruction_c *);
+  BX_SMF void FNCLEX(bxInstruction_c *);
+
+  BX_SMF void FRSTOR(bxInstruction_c *);
+  BX_SMF void FNSAVE(bxInstruction_c *);
  BX_SMF void FLDENV(bxInstruction_c *);
+  BX_SMF void FNSTENV(bxInstruction_c *);
+
  BX_SMF void FLDCW(bxInstruction_c *);
+  BX_SMF void FNSTCW(bxInstruction_c *);
+  BX_SMF void FNSTSW(bxInstruction_c *);
+  BX_SMF void FNSTSW_AX(bxInstruction_c *);
+
+  // const
  BX_SMF void FLD1(bxInstruction_c *); 
  BX_SMF void FLDL2T(bxInstruction_c *);
  BX_SMF void FLDL2E(bxInstruction_c *);
@ -1890,41 +1919,10 @@ union {
  BX_SMF void FLDLG2(bxInstruction_c *);
  BX_SMF void FLDLN2(bxInstruction_c *);
  BX_SMF void FLDZ(bxInstruction_c *);                        
-  BX_SMF void FBLD_PACKED_BCD(bxInstruction_c *);
-
-  // store
-  BX_SMF void FST_STi(bxInstruction_c *);
-  BX_SMF void FSTP_STi(bxInstruction_c *);
-  BX_SMF void FST_SINGLE_REAL(bxInstruction_c *);
-  BX_SMF void FSTP_SINGLE_REAL(bxInstruction_c *);
-  BX_SMF void FST_DOUBLE_REAL(bxInstruction_c *);
-  BX_SMF void FSTP_DOUBLE_REAL(bxInstruction_c *);
-  BX_SMF void FSTP_EXTENDED_REAL(bxInstruction_c *);
-
-  BX_SMF void FIST_WORD_INTEGER(bxInstruction_c *);
-  BX_SMF void FISTP_WORD_INTEGER(bxInstruction_c *);
-  BX_SMF void FIST_DWORD_INTEGER(bxInstruction_c *);
-  BX_SMF void FISTP_DWORD_INTEGER(bxInstruction_c *);
-  BX_SMF void FISTP_QWORD_INTEGER(bxInstruction_c *);
-  
-  BX_SMF void FNSTENV(bxInstruction_c *);
-  BX_SMF void FNSTCW(bxInstruction_c *);
-  BX_SMF void FNSTSW(bxInstruction_c *);
-  BX_SMF void FNSTSW_AX(bxInstruction_c *);
-  BX_SMF void FBSTP_PACKED_BCD(bxInstruction_c *);
-
-  BX_SMF void FISTTP16(bxInstruction_c *);
-  BX_SMF void FISTTP32(bxInstruction_c *);
-  BX_SMF void FISTTP64(bxInstruction_c *);
-
-  // save restore
-  BX_SMF void FRSTOR(bxInstruction_c *);
-  BX_SMF void FNSAVE(bxInstruction_c *);

  // add
  BX_SMF void FADD_ST0_STj(bxInstruction_c *);
  BX_SMF void FADD_STi_ST0(bxInstruction_c *);
-  BX_SMF void FADDP_STi_ST0(bxInstruction_c *);
  BX_SMF void FADD_SINGLE_REAL(bxInstruction_c *);
  BX_SMF void FADD_DOUBLE_REAL(bxInstruction_c *);
  BX_SMF void FIADD_WORD_INTEGER(bxInstruction_c *);
@ -1933,7 +1931,6 @@ union {
  // mul
  BX_SMF void FMUL_ST0_STj(bxInstruction_c *);
  BX_SMF void FMUL_STi_ST0(bxInstruction_c *);
-  BX_SMF void FMULP_STi_ST0(bxInstruction_c *);
  BX_SMF void FMUL_SINGLE_REAL(bxInstruction_c *);
  BX_SMF void FMUL_DOUBLE_REAL(bxInstruction_c *);
  BX_SMF void FIMUL_WORD_INTEGER (bxInstruction_c *);
@ -1944,13 +1941,11 @@ union {
  BX_SMF void FSUBR_ST0_STj(bxInstruction_c *);
  BX_SMF void FSUB_STi_ST0(bxInstruction_c *);
  BX_SMF void FSUBR_STi_ST0(bxInstruction_c *);
-  BX_SMF void FSUBP_STi_ST0(bxInstruction_c *);
-  BX_SMF void FSUBRP_STi_ST0(bxInstruction_c *);
-
  BX_SMF void FSUB_SINGLE_REAL(bxInstruction_c *);
  BX_SMF void FSUBR_SINGLE_REAL(bxInstruction_c *);
  BX_SMF void FSUB_DOUBLE_REAL(bxInstruction_c *);
  BX_SMF void FSUBR_DOUBLE_REAL(bxInstruction_c *);
+
  BX_SMF void FISUB_WORD_INTEGER(bxInstruction_c *);
  BX_SMF void FISUBR_WORD_INTEGER(bxInstruction_c *);
  BX_SMF void FISUB_DWORD_INTEGER(bxInstruction_c *);
@ -1961,13 +1956,11 @@ union {
  BX_SMF void FDIVR_ST0_STj(bxInstruction_c *);
  BX_SMF void FDIV_STi_ST0(bxInstruction_c *);
  BX_SMF void FDIVR_STi_ST0(bxInstruction_c *);
-  BX_SMF void FDIVP_STi_ST0(bxInstruction_c *);
-  BX_SMF void FDIVRP_STi_ST0(bxInstruction_c *);
-
  BX_SMF void FDIV_SINGLE_REAL(bxInstruction_c *);
  BX_SMF void FDIVR_SINGLE_REAL(bxInstruction_c *);
  BX_SMF void FDIV_DOUBLE_REAL(bxInstruction_c *);
  BX_SMF void FDIVR_DOUBLE_REAL(bxInstruction_c *);
+
  BX_SMF void FIDIV_WORD_INTEGER(bxInstruction_c *);
  BX_SMF void FIDIVR_WORD_INTEGER(bxInstruction_c *);
  BX_SMF void FIDIV_DWORD_INTEGER(bxInstruction_c *);
@ -1975,43 +1968,29 @@ union {

  // compare
  BX_SMF void FCOM_STi(bxInstruction_c *);
-  BX_SMF void FCOMP_STi(bxInstruction_c *);
-  BX_SMF void FCOMI_ST0_STj(bxInstruction_c *);
-  BX_SMF void FCOMIP_ST0_STj(bxInstruction_c *);
-  BX_SMF void FUCOMI_ST0_STj(bxInstruction_c *);
-  BX_SMF void FUCOMIP_ST0_STj(bxInstruction_c *);
  BX_SMF void FUCOM_STi(bxInstruction_c *);
-  BX_SMF void FUCOMP_STi(bxInstruction_c *);
-
+  BX_SMF void FCOMI_ST0_STj(bxInstruction_c *);
+  BX_SMF void FUCOMI_ST0_STj(bxInstruction_c *);
  BX_SMF void FCOM_SINGLE_REAL(bxInstruction_c *);
-  BX_SMF void FCOMP_SINGLE_REAL(bxInstruction_c *);
  BX_SMF void FCOM_DOUBLE_REAL(bxInstruction_c *);
-  BX_SMF void FCOMP_DOUBLE_REAL(bxInstruction_c *);
  BX_SMF void FICOM_WORD_INTEGER(bxInstruction_c *);
-  BX_SMF void FICOMP_WORD_INTEGER(bxInstruction_c *);
  BX_SMF void FICOM_DWORD_INTEGER(bxInstruction_c *);
-  BX_SMF void FICOMP_DWORD_INTEGER(bxInstruction_c *);
+  BX_SMF void FCMOV_ST0_STj(bxInstruction_c *);

  BX_SMF void FCOMPP(bxInstruction_c *);  
  BX_SMF void FUCOMPP(bxInstruction_c *);

-  // mov
-  BX_SMF void FCMOVB_ST0_STj(bxInstruction_c *);
-  BX_SMF void FCMOVE_ST0_STj(bxInstruction_c *);
-  BX_SMF void FCMOVBE_ST0_STj(bxInstruction_c *);
-  BX_SMF void FCMOVU_ST0_STj(bxInstruction_c *);
-  BX_SMF void FCMOVNB_ST0_STj(bxInstruction_c *);
-  BX_SMF void FCMOVNE_ST0_STj(bxInstruction_c *);
-  BX_SMF void FCMOVNBE_ST0_STj(bxInstruction_c *);
-  BX_SMF void FCMOVNU_ST0_STj(bxInstruction_c *);
-
  // misc
  BX_SMF void FXCH_STi(bxInstruction_c *);
  BX_SMF void FNOP(bxInstruction_c *);
+  BX_SMF void FPLEGACY(bxInstruction_c *);
  BX_SMF void FCHS(bxInstruction_c *);
  BX_SMF void FABS(bxInstruction_c *);
  BX_SMF void FTST(bxInstruction_c *);
  BX_SMF void FXAM(bxInstruction_c *);
+  BX_SMF void FDECSTP(bxInstruction_c *);
+  BX_SMF void FINCSTP(bxInstruction_c *);
+  BX_SMF void FFREE_STi(bxInstruction_c *);

  BX_SMF void F2XM1(bxInstruction_c *);
  BX_SMF void FYL2X(bxInstruction_c *);
@ -2019,8 +1998,6 @@ union {
  BX_SMF void FPATAN(bxInstruction_c *);
  BX_SMF void FXTRACT(bxInstruction_c *);
  BX_SMF void FPREM1(bxInstruction_c *);
-  BX_SMF void FDECSTP(bxInstruction_c *);
-  BX_SMF void FINCSTP(bxInstruction_c *);
  BX_SMF void FPREM(bxInstruction_c *);
  BX_SMF void FYL2XP1(bxInstruction_c *);
  BX_SMF void FSQRT(bxInstruction_c *);
@ -2030,9 +2007,6 @@ union {
  BX_SMF void FSCALE(bxInstruction_c *);
  BX_SMF void FSIN(bxInstruction_c *);
  BX_SMF void FCOS(bxInstruction_c *);
-  BX_SMF void FNCLEX(bxInstruction_c *);
-  BX_SMF void FNINIT(bxInstruction_c *);
-  BX_SMF void FFREE_STi(bxInstruction_c *);
 #endif

  /* MMX */
@ -2096,9 +2070,12 @@ union {
  /* MMX */

 #if BX_SUPPORT_FPU
-  BX_SMF void prepareFPU(void);
-  BX_SMF void FPU_check_pending_exceptions(void);
  BX_SMF void print_state_FPU(void);
+  BX_SMF void prepareFPU(bxInstruction_c *i, bx_bool = 1, bx_bool = 1);
+  BX_SMF void FPU_check_pending_exceptions(void);
+  BX_SMF void FPU_stack_underflow(int stnr, int pop_stack = 0);
+  BX_SMF void FPU_stack_overflow(void);
+  BX_SMF int  FPU_exception(int exception);
 #endif

 #if BX_SUPPORT_MMX || BX_SUPPORT_SSE
@ -2343,8 +2320,8 @@ union {
  /* PNI */

 #if BX_SUPPORT_FPU
-  BX_SMF void fpu_execute(bxInstruction_c *i);
-  BX_SMF void fpu_init(void);
+  BX_SMF int  fpu_save_environment(bxInstruction_c *i);
+  BX_SMF int  fpu_load_environment(bxInstruction_c *i);
 #endif

  BX_SMF void CMPXCHG_XBTS(bxInstruction_c *);
@ -2772,7 +2749,8 @@ union {
  // revalidate_prefetch_q is now a no-op, due to the newer EIP window
  // technique.
  BX_SMF BX_CPP_INLINE void revalidate_prefetch_q(void) { }
-  BX_SMF BX_CPP_INLINE void invalidate_prefetch_q(void) {
+  BX_SMF BX_CPP_INLINE void invalidate_prefetch_q(void) 
+    {
    BX_CPU_THIS_PTR eipPageWindowSize = 0;
    }

@ -2784,12 +2762,18 @@ union {
  BX_SMF void write_virtual_qword(unsigned seg, bx_address offset, Bit64u *data) BX_CPP_AttrRegparmN(3);
  BX_SMF void write_virtual_dqword(unsigned s, bx_address off, Bit8u *data) BX_CPP_AttrRegparmN(3);
  BX_SMF void write_virtual_dqword_aligned(unsigned s, bx_address off, Bit8u *data) BX_CPP_AttrRegparmN(3);
+#if BX_SUPPORT_FPU
+  BX_SMF void write_virtual_tword(unsigned seg, bx_address offset, floatx80 *data) BX_CPP_AttrRegparmN(3);
+#endif
  BX_SMF void read_virtual_byte(unsigned seg, bx_address offset, Bit8u *data) BX_CPP_AttrRegparmN(3);
  BX_SMF void read_virtual_word(unsigned seg, bx_address offset, Bit16u *data) BX_CPP_AttrRegparmN(3);
  BX_SMF void read_virtual_dword(unsigned seg, bx_address offset, Bit32u *data) BX_CPP_AttrRegparmN(3);
  BX_SMF void read_virtual_qword(unsigned seg, bx_address offset, Bit64u *data) BX_CPP_AttrRegparmN(3);
  BX_SMF void read_virtual_dqword(unsigned s, bx_address off, Bit8u *data) BX_CPP_AttrRegparmN(3);
  BX_SMF void read_virtual_dqword_aligned(unsigned s, bx_address off, Bit8u *data) BX_CPP_AttrRegparmN(3);
+#if BX_SUPPORT_FPU
+  BX_SMF void read_virtual_tword(unsigned seg, bx_address offset, floatx80 *data) BX_CPP_AttrRegparmN(3);
+#endif

 #define readVirtualDQword(s, off, data) read_virtual_dqword(s, off, data)
 #define readVirtualDQwordAligned(s, off, data) read_virtual_dqword_aligned(s, off, data)
@ -2858,6 +2842,9 @@ union {
  BX_SMF void write_eflags(Bit32u eflags, bx_bool change_IOPL, bx_bool change_IF,
                    bx_bool change_VM, bx_bool change_RF);
  BX_SMF void writeEFlags(Bit32u eflags, Bit32u changeMask) BX_CPP_AttrRegparmN(2); // Newer variant.
+#if BX_SUPPORT_FPU || BX_SUPPORT_SSE >= 1
+  BX_SMF void write_eflags_fpu_compare(int float_relation);
+#endif
  BX_SMF Bit16u read_flags(void);
  BX_SMF Bit32u read_eflags(void);
  BX_SMF Bit32u get_segment_base(unsigned seg);
--- a/bochs/cpu/data_xfer16.cc
+++ b/bochs/cpu/data_xfer16.cc
@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////
-// $Id: data_xfer16.cc,v 1.29 2004-05-10 21:05:48 sshwarts Exp $
+// $Id: data_xfer16.cc,v 1.30 2004-06-18 14:11:06 sshwarts Exp $
 /////////////////////////////////////////////////////////////////////////
 //
 //  Copyright (C) 2001  MandrakeSoft S.A.
@ -316,7 +316,7 @@ BX_CPU_C::CMOV_GwEw(bxInstruction_c *i)
  //       of whether condition is true or not.  Thus, exceptions may
  //       occur even if the MOV does not take place.

-  bx_bool condition;
+  bx_bool condition = 0;
  Bit16u op2_16;

  switch (i->b1()) {
@ -338,7 +338,6 @@ BX_CPU_C::CMOV_GwEw(bxInstruction_c *i)
    case 0x14E: condition = get_ZF() || (getB_SF() != getB_OF()); break;
    case 0x14F: condition = !get_ZF() && (getB_SF() == getB_OF()); break;
    default:
-      condition = 0;
      BX_PANIC(("CMOV_GwEw: default case"));
    }

--- a/bochs/cpu/data_xfer32.cc
+++ b/bochs/cpu/data_xfer32.cc
@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////
-// $Id: data_xfer32.cc,v 1.27 2004-05-10 21:05:48 sshwarts Exp $
+// $Id: data_xfer32.cc,v 1.28 2004-06-18 14:11:06 sshwarts Exp $
 /////////////////////////////////////////////////////////////////////////
 //
 //  Copyright (C) 2001  MandrakeSoft S.A.
@ -257,7 +257,7 @@ BX_CPU_C::CMOV_GdEd(bxInstruction_c *i)
  //       of whether condition is true or not.  Thus, exceptions may
  //       occur even if the MOV does not take place.

-  bx_bool condition;
+  bx_bool condition = 0;
  Bit32u op2_32;

  switch (i->b1()) {
@ -279,7 +279,6 @@ BX_CPU_C::CMOV_GdEd(bxInstruction_c *i)
    case 0x14E: condition = get_ZF() || (getB_SF() != getB_OF()); break;
    case 0x14F: condition = !get_ZF() && (getB_SF() == getB_OF()); break;
    default:
-      condition = 0;
      BX_PANIC(("CMOV_GdEd: default case"));
    }

--- a/bochs/cpu/data_xfer64.cc
+++ b/bochs/cpu/data_xfer64.cc
@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////
-// $Id: data_xfer64.cc,v 1.18 2004-05-10 21:05:48 sshwarts Exp $
+// $Id: data_xfer64.cc,v 1.19 2004-06-18 14:11:06 sshwarts Exp $
 /////////////////////////////////////////////////////////////////////////
 //
 //  Copyright (C) 2001  MandrakeSoft S.A.
@ -362,7 +362,7 @@ BX_CPU_C::CMOV_GqEq(bxInstruction_c *i)
  //       of whether condition is true or not.  Thus, exceptions may
  //       occur even if the MOV does not take place.

-  bx_bool condition;
+  bx_bool condition = 0;
  Bit64u op2_64;

  switch (i->b1()) {
@ -384,7 +384,6 @@ BX_CPU_C::CMOV_GqEq(bxInstruction_c *i)
    case 0x14E: condition = get_ZF() || (getB_SF() != getB_OF()); break;
    case 0x14F: condition = !get_ZF() && (getB_SF() == getB_OF()); break;
    default:
-      condition = 0;
      BX_PANIC(("CMOV_GqEq: default case"));
    }

--- a/bochs/cpu/fetchdecode.h
+++ b/bochs/cpu/fetchdecode.h
@ -67,7 +67,7 @@ static BxOpcodeInfo_t BxOpcodeInfo_FPGroupD8[8] = {
  /* 0 */  { 0,  &BX_CPU_C::FADD_SINGLE_REAL  },
  /* 1 */  { 0,  &BX_CPU_C::FMUL_SINGLE_REAL  },
  /* 2 */  { 0,  &BX_CPU_C::FCOM_SINGLE_REAL  },
-  /* 3 */  { 0,  &BX_CPU_C::FCOMP_SINGLE_REAL },
+  /* 3 */  { 0,  &BX_CPU_C::FCOM_SINGLE_REAL  },	// FCOMP_SINGLE_REAL
  /* 4 */  { 0,  &BX_CPU_C::FSUB_SINGLE_REAL  },
  /* 5 */  { 0,  &BX_CPU_C::FSUBR_SINGLE_REAL },
  /* 6 */  { 0,  &BX_CPU_C::FDIV_SINGLE_REAL  },
@ -76,14 +76,14 @@ static BxOpcodeInfo_t BxOpcodeInfo_FPGroupD8[8] = {

  // D9 (modrm is outside 00h - BFh) (mod != 11)
 static BxOpcodeInfo_t BxOpcodeInfo_FPGroupD9[8] = { 
-  /* 0 */  { 0,  &BX_CPU_C::FLD_SINGLE_REAL   },
-  /* 1 */  { 0,  &BX_CPU_C::BxError           },
-  /* 2 */  { 0,  &BX_CPU_C::FST_SINGLE_REAL   },
-  /* 3 */  { 0,  &BX_CPU_C::FSTP_SINGLE_REAL  },
-  /* 4 */  { 0,  &BX_CPU_C::FLDENV            },
-  /* 5 */  { 0,  &BX_CPU_C::FLDCW             },
-  /* 6 */  { 0,  &BX_CPU_C::FNSTENV           },
-  /* 7 */  { 0,  &BX_CPU_C::FNSTCW            }
+  /* 0 */  { 0,  &BX_CPU_C::FLD_SINGLE_REAL  },
+  /* 1 */  { 0,  &BX_CPU_C::BxError          },
+  /* 2 */  { 0,  &BX_CPU_C::FST_SINGLE_REAL  },
+  /* 3 */  { 0,  &BX_CPU_C::FST_SINGLE_REAL  },	 	// FSTP_SINGLE_REAL
+  /* 4 */  { 0,  &BX_CPU_C::FLDENV           },
+  /* 5 */  { 0,  &BX_CPU_C::FLDCW            },
+  /* 6 */  { 0,  &BX_CPU_C::FNSTENV          },
+  /* 7 */  { 0,  &BX_CPU_C::FNSTCW           }
  };

  // DA (modrm is outside 00h - BFh) (mod != 11)
@ -91,7 +91,7 @@ static BxOpcodeInfo_t BxOpcodeInfo_FPGroupDA[8] = {
  /* 0 */  { 0,  &BX_CPU_C::FIADD_DWORD_INTEGER  },
  /* 1 */  { 0,  &BX_CPU_C::FIMUL_DWORD_INTEGER  },
  /* 2 */  { 0,  &BX_CPU_C::FICOM_DWORD_INTEGER  },
-  /* 3 */  { 0,  &BX_CPU_C::FICOMP_DWORD_INTEGER },
+  /* 3 */  { 0,  &BX_CPU_C::FICOM_DWORD_INTEGER  },	// FICOMP_DWORD_INTEGER
  /* 4 */  { 0,  &BX_CPU_C::FISUB_DWORD_INTEGER  },
  /* 5 */  { 0,  &BX_CPU_C::FISUBR_DWORD_INTEGER },
  /* 6 */  { 0,  &BX_CPU_C::FIDIV_DWORD_INTEGER  },
@ -103,7 +103,7 @@ static BxOpcodeInfo_t BxOpcodeInfo_FPGroupDB[8] = {
  /* 0 */  { 0,  &BX_CPU_C::FILD_DWORD_INTEGER  },
  /* 1 */  { 0,  &BX_CPU_C::FISTTP64            },
  /* 2 */  { 0,  &BX_CPU_C::FIST_DWORD_INTEGER  },
-  /* 3 */  { 0,  &BX_CPU_C::FISTP_DWORD_INTEGER },
+  /* 3 */  { 0,  &BX_CPU_C::FIST_DWORD_INTEGER  },	// FISTP
  /* 4 */  { 0,  &BX_CPU_C::BxError             },
  /* 5 */  { 0,  &BX_CPU_C::FLD_EXTENDED_REAL   },
  /* 6 */  { 0,  &BX_CPU_C::BxError             },
@ -115,7 +115,7 @@ static BxOpcodeInfo_t BxOpcodeInfo_FPGroupDC[8] = {
  /* 0 */  { 0,  &BX_CPU_C::FADD_DOUBLE_REAL  },
  /* 1 */  { 0,  &BX_CPU_C::FMUL_DOUBLE_REAL  },
  /* 2 */  { 0,  &BX_CPU_C::FCOM_DOUBLE_REAL  },
-  /* 3 */  { 0,  &BX_CPU_C::FCOMP_DOUBLE_REAL },
+  /* 3 */  { 0,  &BX_CPU_C::FCOM_DOUBLE_REAL  },	// FCOMP_DOUBLE_REAL
  /* 4 */  { 0,  &BX_CPU_C::FSUB_DOUBLE_REAL  },
  /* 5 */  { 0,  &BX_CPU_C::FSUBR_DOUBLE_REAL },
  /* 6 */  { 0,  &BX_CPU_C::FDIV_DOUBLE_REAL  },
@ -124,14 +124,14 @@ static BxOpcodeInfo_t BxOpcodeInfo_FPGroupDC[8] = {

  // DD (modrm is outside 00h - BFh) (mod != 11)
 static BxOpcodeInfo_t BxOpcodeInfo_FPGroupDD[8] = { 
-  /* 0 */  { 0,  &BX_CPU_C::FLD_DOUBLE_REAL  },
-  /* 1 */  { 0,  &BX_CPU_C::FISTTP32         },
-  /* 2 */  { 0,  &BX_CPU_C::FST_DOUBLE_REAL  },
-  /* 3 */  { 0,  &BX_CPU_C::FSTP_DOUBLE_REAL },
-  /* 4 */  { 0,  &BX_CPU_C::FRSTOR           },
-  /* 5 */  { 0,  &BX_CPU_C::BxError          },
-  /* 6 */  { 0,  &BX_CPU_C::FNSAVE           },
-  /* 7 */  { 0,  &BX_CPU_C::FNSTSW           }
+  /* 0 */  { 0,  &BX_CPU_C::FLD_DOUBLE_REAL },
+  /* 1 */  { 0,  &BX_CPU_C::FISTTP32        },
+  /* 2 */  { 0,  &BX_CPU_C::FST_DOUBLE_REAL },
+  /* 3 */  { 0,  &BX_CPU_C::FST_DOUBLE_REAL },		// FSTP_DOUBLE_REAL
+  /* 4 */  { 0,  &BX_CPU_C::FRSTOR          },
+  /* 5 */  { 0,  &BX_CPU_C::BxError         },
+  /* 6 */  { 0,  &BX_CPU_C::FNSAVE          },
+  /* 7 */  { 0,  &BX_CPU_C::FNSTSW          }
  };

  // DE (modrm is outside 00h - BFh) (mod != 11)
@ -139,7 +139,7 @@ static BxOpcodeInfo_t BxOpcodeInfo_FPGroupDE[8] = {
  /* 0 */  { 0,  &BX_CPU_C::FIADD_WORD_INTEGER  },
  /* 1 */  { 0,  &BX_CPU_C::FIMUL_WORD_INTEGER  },
  /* 2 */  { 0,  &BX_CPU_C::FICOM_WORD_INTEGER  },
-  /* 3 */  { 0,  &BX_CPU_C::FICOMP_WORD_INTEGER },
+  /* 3 */  { 0,  &BX_CPU_C::FICOM_WORD_INTEGER  },	// FISTP_WORD_INTEGER
  /* 4 */  { 0,  &BX_CPU_C::FISUB_WORD_INTEGER  },
  /* 5 */  { 0,  &BX_CPU_C::FISUBR_WORD_INTEGER },
  /* 6 */  { 0,  &BX_CPU_C::FIDIV_WORD_INTEGER  },
@ -151,7 +151,7 @@ static BxOpcodeInfo_t BxOpcodeInfo_FPGroupDF[8] = {
  /* 0 */  { 0,  &BX_CPU_C::FILD_WORD_INTEGER   },
  /* 1 */  { 0,  &BX_CPU_C::FISTTP16            },
  /* 2 */  { 0,  &BX_CPU_C::FIST_WORD_INTEGER   },
-  /* 3 */  { 0,  &BX_CPU_C::FISTP_WORD_INTEGER  },
+  /* 3 */  { 0,  &BX_CPU_C::FIST_WORD_INTEGER  	},	// FISTP
  /* 4 */  { 0,  &BX_CPU_C::FBLD_PACKED_BCD     },
  /* 5 */  { 0,  &BX_CPU_C::FILD_QWORD_INTEGER  },
  /* 6 */  { 0,  &BX_CPU_C::FBSTP_PACKED_BCD    },
@ -185,14 +185,14 @@ static BxOpcodeInfo_t BxOpcodeInfo_FloatingPoint[512] = {
  /* D8 D5 */  { 0,  &BX_CPU_C::FCOM_STi      },
  /* D8 D6 */  { 0,  &BX_CPU_C::FCOM_STi      },
  /* D8 D7 */  { 0,  &BX_CPU_C::FCOM_STi      },
-  /* D8 D8 */  { 0,  &BX_CPU_C::FCOMP_STi     },
-  /* D8 D9 */  { 0,  &BX_CPU_C::FCOMP_STi     },
-  /* D8 DA */  { 0,  &BX_CPU_C::FCOMP_STi     },
-  /* D8 DB */  { 0,  &BX_CPU_C::FCOMP_STi     },
-  /* D8 DC */  { 0,  &BX_CPU_C::FCOMP_STi     },
-  /* D8 DD */  { 0,  &BX_CPU_C::FCOMP_STi     },
-  /* D8 DE */  { 0,  &BX_CPU_C::FCOMP_STi     },
-  /* D8 DF */  { 0,  &BX_CPU_C::FCOMP_STi     },
+  /* D8 D8 */  { 0,  &BX_CPU_C::FCOM_STi      },	// FCOMP
+  /* D8 D9 */  { 0,  &BX_CPU_C::FCOM_STi      },	// FCOMP
+  /* D8 DA */  { 0,  &BX_CPU_C::FCOM_STi      },	// FCOMP
+  /* D8 DB */  { 0,  &BX_CPU_C::FCOM_STi      },	// FCOMP
+  /* D8 DC */  { 0,  &BX_CPU_C::FCOM_STi      },	// FCOMP
+  /* D8 DD */  { 0,  &BX_CPU_C::FCOM_STi      },	// FCOMP
+  /* D8 DE */  { 0,  &BX_CPU_C::FCOM_STi      },	// FCOMP
+  /* D8 DF */  { 0,  &BX_CPU_C::FCOM_STi      },	// FCOMP
  /* D8 E0 */  { 0,  &BX_CPU_C::FSUB_ST0_STj  },
  /* D8 E1 */  { 0,  &BX_CPU_C::FSUB_ST0_STj  },
  /* D8 E2 */  { 0,  &BX_CPU_C::FSUB_ST0_STj  },
@ -293,38 +293,38 @@ static BxOpcodeInfo_t BxOpcodeInfo_FloatingPoint[512] = {
  /* D9 FF */  { 0,  &BX_CPU_C::FCOS     },
  
  // DA (modrm is outside 00h - BFh) (mod == 11)
-  /* DA C0 */  { 0,  &BX_CPU_C::FCMOVB_ST0_STj  },
-  /* DA C1 */  { 0,  &BX_CPU_C::FCMOVB_ST0_STj  },
-  /* DA C2 */  { 0,  &BX_CPU_C::FCMOVB_ST0_STj  },
-  /* DA C3 */  { 0,  &BX_CPU_C::FCMOVB_ST0_STj  },
-  /* DA C4 */  { 0,  &BX_CPU_C::FCMOVB_ST0_STj  },
-  /* DA C5 */  { 0,  &BX_CPU_C::FCMOVB_ST0_STj  },
-  /* DA C6 */  { 0,  &BX_CPU_C::FCMOVB_ST0_STj  },
-  /* DA C7 */  { 0,  &BX_CPU_C::FCMOVB_ST0_STj  },
-  /* DA C8 */  { 0,  &BX_CPU_C::FCMOVE_ST0_STj  },
-  /* DA C9 */  { 0,  &BX_CPU_C::FCMOVE_ST0_STj  },
-  /* DA CA */  { 0,  &BX_CPU_C::FCMOVE_ST0_STj  },
-  /* DA CB */  { 0,  &BX_CPU_C::FCMOVE_ST0_STj  },
-  /* DA CC */  { 0,  &BX_CPU_C::FCMOVE_ST0_STj  },
-  /* DA CD */  { 0,  &BX_CPU_C::FCMOVE_ST0_STj  },
-  /* DA CE */  { 0,  &BX_CPU_C::FCMOVE_ST0_STj  },
-  /* DA CF */  { 0,  &BX_CPU_C::FCMOVE_ST0_STj  },
-  /* DA D0 */  { 0,  &BX_CPU_C::FCMOVBE_ST0_STj },
-  /* DA D1 */  { 0,  &BX_CPU_C::FCMOVBE_ST0_STj },
-  /* DA D2 */  { 0,  &BX_CPU_C::FCMOVBE_ST0_STj },
-  /* DA D3 */  { 0,  &BX_CPU_C::FCMOVBE_ST0_STj },
-  /* DA D4 */  { 0,  &BX_CPU_C::FCMOVBE_ST0_STj },
-  /* DA D5 */  { 0,  &BX_CPU_C::FCMOVBE_ST0_STj },
-  /* DA D6 */  { 0,  &BX_CPU_C::FCMOVBE_ST0_STj },
-  /* DA D7 */  { 0,  &BX_CPU_C::FCMOVBE_ST0_STj },
-  /* DA D8 */  { 0,  &BX_CPU_C::FCMOVU_ST0_STj  },
-  /* DA D9 */  { 0,  &BX_CPU_C::FCMOVU_ST0_STj  },
-  /* DA DA */  { 0,  &BX_CPU_C::FCMOVU_ST0_STj  },
-  /* DA DB */  { 0,  &BX_CPU_C::FCMOVU_ST0_STj  },
-  /* DA DC */  { 0,  &BX_CPU_C::FCMOVU_ST0_STj  },
-  /* DA DD */  { 0,  &BX_CPU_C::FCMOVU_ST0_STj  },
-  /* DA DE */  { 0,  &BX_CPU_C::FCMOVU_ST0_STj  },
-  /* DA DF */  { 0,  &BX_CPU_C::FCMOVU_ST0_STj  },
+  /* DA C0 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA C1 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA C2 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA C3 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA C4 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA C5 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA C6 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA C7 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA C8 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA C9 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA CA */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA CB */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA CC */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA CD */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA CE */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA CF */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA D0 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA D1 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA D2 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA D3 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA D4 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA D5 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA D6 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA D7 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA D8 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA D9 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA DA */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA DB */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA DC */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA DD */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA DE */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
+  /* DA DF */  { 0,  &BX_CPU_C::FCMOV_ST0_STj },
  /* DA E0 */  { 0,  &BX_CPU_C::BxError },
  /* DA E1 */  { 0,  &BX_CPU_C::BxError },
  /* DA E2 */  { 0,  &BX_CPU_C::BxError },
@ -359,62 +359,62 @@ static BxOpcodeInfo_t BxOpcodeInfo_FloatingPoint[512] = {
  /* DA FF */  { 0,  &BX_CPU_C::BxError },

  // DB (modrm is outside 00h - BFh) (mod == 11)
-  /* DB C0 */  { 0,  &BX_CPU_C::FCMOVNB_ST0_STj  },
-  /* DB C1 */  { 0,  &BX_CPU_C::FCMOVNB_ST0_STj  },
-  /* DB C2 */  { 0,  &BX_CPU_C::FCMOVNB_ST0_STj  },
-  /* DB C3 */  { 0,  &BX_CPU_C::FCMOVNB_ST0_STj  },
-  /* DB C4 */  { 0,  &BX_CPU_C::FCMOVNB_ST0_STj  },
-  /* DB C5 */  { 0,  &BX_CPU_C::FCMOVNB_ST0_STj  },
-  /* DB C6 */  { 0,  &BX_CPU_C::FCMOVNB_ST0_STj  },
-  /* DB C7 */  { 0,  &BX_CPU_C::FCMOVNB_ST0_STj  },
-  /* DB C8 */  { 0,  &BX_CPU_C::FCMOVNE_ST0_STj  },
-  /* DB C9 */  { 0,  &BX_CPU_C::FCMOVNE_ST0_STj  },
-  /* DB CA */  { 0,  &BX_CPU_C::FCMOVNE_ST0_STj  },
-  /* DB CB */  { 0,  &BX_CPU_C::FCMOVNE_ST0_STj  },
-  /* DB CC */  { 0,  &BX_CPU_C::FCMOVNE_ST0_STj  },
-  /* DB CD */  { 0,  &BX_CPU_C::FCMOVNE_ST0_STj  },
-  /* DB CE */  { 0,  &BX_CPU_C::FCMOVNE_ST0_STj  },
-  /* DB CF */  { 0,  &BX_CPU_C::FCMOVNE_ST0_STj  },
-  /* DB D0 */  { 0,  &BX_CPU_C::FCMOVNBE_ST0_STj },
-  /* DB D1 */  { 0,  &BX_CPU_C::FCMOVNBE_ST0_STj },
-  /* DB D2 */  { 0,  &BX_CPU_C::FCMOVNBE_ST0_STj },
-  /* DB D3 */  { 0,  &BX_CPU_C::FCMOVNBE_ST0_STj },
-  /* DB D4 */  { 0,  &BX_CPU_C::FCMOVNBE_ST0_STj },
-  /* DB D5 */  { 0,  &BX_CPU_C::FCMOVNBE_ST0_STj },
-  /* DB D6 */  { 0,  &BX_CPU_C::FCMOVNBE_ST0_STj },
-  /* DB D7 */  { 0,  &BX_CPU_C::FCMOVNBE_ST0_STj },
-  /* DB D8 */  { 0,  &BX_CPU_C::FCMOVNU_ST0_STj  },
-  /* DB D9 */  { 0,  &BX_CPU_C::FCMOVNU_ST0_STj  },
-  /* DB DA */  { 0,  &BX_CPU_C::FCMOVNU_ST0_STj  },
-  /* DB DB */  { 0,  &BX_CPU_C::FCMOVNU_ST0_STj  },
-  /* DB DC */  { 0,  &BX_CPU_C::FCMOVNU_ST0_STj  },
-  /* DB DD */  { 0,  &BX_CPU_C::FCMOVNU_ST0_STj  },
-  /* DB DE */  { 0,  &BX_CPU_C::FCMOVNU_ST0_STj  },
-  /* DB DF */  { 0,  &BX_CPU_C::FCMOVNU_ST0_STj  },
-  /* DB E0 */  { 0,  &BX_CPU_C::FNOP             },     // feni  (287 only)
-  /* DB E1 */  { 0,  &BX_CPU_C::FNOP             },     // fdisi (287 only)
-  /* DB E2 */  { 0,  &BX_CPU_C::FNCLEX           },
-  /* DB E3 */  { 0,  &BX_CPU_C::FNINIT           },
-  /* DB E4 */  { 0,  &BX_CPU_C::FNOP             },     // fsetpm (287 only)
-  /* DB E5 */  { 0,  &BX_CPU_C::BxError          },
-  /* DB E6 */  { 0,  &BX_CPU_C::BxError          },
-  /* DB E7 */  { 0,  &BX_CPU_C::BxError          },
-  /* DB E8 */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj   },
-  /* DB E9 */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj   },
-  /* DB EA */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj   },
-  /* DB EB */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj   },
-  /* DB EC */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj   },
-  /* DB ED */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj   },
-  /* DB EE */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj   },
-  /* DB EF */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj   },
-  /* DB F0 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj    },
-  /* DB F1 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj    },
-  /* DB F2 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj    },
-  /* DB F3 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj    },
-  /* DB F4 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj    },
-  /* DB F5 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj    },
-  /* DB F6 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj    },
-  /* DB F7 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj    },
+  /* DB C0 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB C1 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB C2 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB C3 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB C4 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB C5 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB C6 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB C7 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB C8 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB C9 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB CA */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB CB */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB CC */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB CD */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB CE */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB CF */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB D0 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB D1 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB D2 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB D3 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB D4 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB D5 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB D6 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB D7 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB D8 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB D9 */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB DA */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB DB */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB DC */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB DD */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB DE */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB DF */  { 0,  &BX_CPU_C::FCMOV_ST0_STj  },
+  /* DB E0 */  { 0,  &BX_CPU_C::FPLEGACY       },     // feni  (287 only)
+  /* DB E1 */  { 0,  &BX_CPU_C::FPLEGACY       },     // fdisi (287 only)
+  /* DB E2 */  { 0,  &BX_CPU_C::FNCLEX         },
+  /* DB E3 */  { 0,  &BX_CPU_C::FNINIT         },
+  /* DB E4 */  { 0,  &BX_CPU_C::FPLEGACY       },     // fsetpm (287 only)
+  /* DB E5 */  { 0,  &BX_CPU_C::BxError        },
+  /* DB E6 */  { 0,  &BX_CPU_C::BxError        },
+  /* DB E7 */  { 0,  &BX_CPU_C::BxError        },
+  /* DB E8 */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },
+  /* DB E9 */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },
+  /* DB EA */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },
+  /* DB EB */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },
+  /* DB EC */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },
+  /* DB ED */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },
+  /* DB EE */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },
+  /* DB EF */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },
+  /* DB F0 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },
+  /* DB F1 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },
+  /* DB F2 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },
+  /* DB F3 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },
+  /* DB F4 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },
+  /* DB F5 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },
+  /* DB F6 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },
+  /* DB F7 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },
  /* DB F8 */  { 0,  &BX_CPU_C::BxError },
  /* DB F9 */  { 0,  &BX_CPU_C::BxError },
  /* DB FA */  { 0,  &BX_CPU_C::BxError },
@ -491,54 +491,54 @@ static BxOpcodeInfo_t BxOpcodeInfo_FloatingPoint[512] = {
  /* DC FF */  { 0,  &BX_CPU_C::FDIV_STi_ST0  },

  // DD (modrm is outside 00h - BFh) (mod == 11)
-  /* DD C0 */  { 0,  &BX_CPU_C::FFREE_STi  },
-  /* DD C1 */  { 0,  &BX_CPU_C::FFREE_STi  },
-  /* DD C2 */  { 0,  &BX_CPU_C::FFREE_STi  },
-  /* DD C3 */  { 0,  &BX_CPU_C::FFREE_STi  },
-  /* DD C4 */  { 0,  &BX_CPU_C::FFREE_STi  },
-  /* DD C5 */  { 0,  &BX_CPU_C::FFREE_STi  },
-  /* DD C6 */  { 0,  &BX_CPU_C::FFREE_STi  },
-  /* DD C7 */  { 0,  &BX_CPU_C::FFREE_STi  },
-  /* DD C8 */  { 0,  &BX_CPU_C::BxError    },
-  /* DD C9 */  { 0,  &BX_CPU_C::BxError    },
-  /* DD CA */  { 0,  &BX_CPU_C::BxError    },
-  /* DD CB */  { 0,  &BX_CPU_C::BxError    },
-  /* DD CC */  { 0,  &BX_CPU_C::BxError    },
-  /* DD CD */  { 0,  &BX_CPU_C::BxError    },
-  /* DD CE */  { 0,  &BX_CPU_C::BxError    },
-  /* DD CF */  { 0,  &BX_CPU_C::BxError    },
-  /* DD D0 */  { 0,  &BX_CPU_C::FST_STi    },
-  /* DD D1 */  { 0,  &BX_CPU_C::FST_STi    },
-  /* DD D2 */  { 0,  &BX_CPU_C::FST_STi    },
-  /* DD D3 */  { 0,  &BX_CPU_C::FST_STi    },
-  /* DD D4 */  { 0,  &BX_CPU_C::FST_STi    },
-  /* DD D5 */  { 0,  &BX_CPU_C::FST_STi    },
-  /* DD D6 */  { 0,  &BX_CPU_C::FST_STi    },
-  /* DD D7 */  { 0,  &BX_CPU_C::FST_STi    },
-  /* DD D8 */  { 0,  &BX_CPU_C::FSTP_STi   },
-  /* DD D9 */  { 0,  &BX_CPU_C::FSTP_STi   },
-  /* DD DA */  { 0,  &BX_CPU_C::FSTP_STi   },
-  /* DD DB */  { 0,  &BX_CPU_C::FSTP_STi   },
-  /* DD DC */  { 0,  &BX_CPU_C::FSTP_STi   },
-  /* DD DD */  { 0,  &BX_CPU_C::FSTP_STi   },
-  /* DD DE */  { 0,  &BX_CPU_C::FSTP_STi   },
-  /* DD DF */  { 0,  &BX_CPU_C::FSTP_STi   },
-  /* DD E0 */  { 0,  &BX_CPU_C::FUCOM_STi  },
-  /* DD E1 */  { 0,  &BX_CPU_C::FUCOM_STi  },
-  /* DD E2 */  { 0,  &BX_CPU_C::FUCOM_STi  },
-  /* DD E3 */  { 0,  &BX_CPU_C::FUCOM_STi  },
-  /* DD E4 */  { 0,  &BX_CPU_C::FUCOM_STi  },
-  /* DD E5 */  { 0,  &BX_CPU_C::FUCOM_STi  },
-  /* DD E6 */  { 0,  &BX_CPU_C::FUCOM_STi  },
-  /* DD E7 */  { 0,  &BX_CPU_C::FUCOM_STi  },
-  /* DD E8 */  { 0,  &BX_CPU_C::FUCOMP_STi },
-  /* DD E9 */  { 0,  &BX_CPU_C::FUCOMP_STi },
-  /* DD EA */  { 0,  &BX_CPU_C::FUCOMP_STi },
-  /* DD EB */  { 0,  &BX_CPU_C::FUCOMP_STi },
-  /* DD EC */  { 0,  &BX_CPU_C::FUCOMP_STi },
-  /* DD ED */  { 0,  &BX_CPU_C::FUCOMP_STi },
-  /* DD EE */  { 0,  &BX_CPU_C::FUCOMP_STi },
-  /* DD EF */  { 0,  &BX_CPU_C::FUCOMP_STi },
+  /* DD C0 */  { 0,  &BX_CPU_C::FFREE_STi },
+  /* DD C1 */  { 0,  &BX_CPU_C::FFREE_STi },
+  /* DD C2 */  { 0,  &BX_CPU_C::FFREE_STi },
+  /* DD C3 */  { 0,  &BX_CPU_C::FFREE_STi },
+  /* DD C4 */  { 0,  &BX_CPU_C::FFREE_STi },
+  /* DD C5 */  { 0,  &BX_CPU_C::FFREE_STi },
+  /* DD C6 */  { 0,  &BX_CPU_C::FFREE_STi },
+  /* DD C7 */  { 0,  &BX_CPU_C::FFREE_STi },
+  /* DD C8 */  { 0,  &BX_CPU_C::BxError   },
+  /* DD C9 */  { 0,  &BX_CPU_C::BxError   },
+  /* DD CA */  { 0,  &BX_CPU_C::BxError   },
+  /* DD CB */  { 0,  &BX_CPU_C::BxError   },
+  /* DD CC */  { 0,  &BX_CPU_C::BxError   },
+  /* DD CD */  { 0,  &BX_CPU_C::BxError   },
+  /* DD CE */  { 0,  &BX_CPU_C::BxError   },
+  /* DD CF */  { 0,  &BX_CPU_C::BxError   },
+  /* DD D0 */  { 0,  &BX_CPU_C::FST_STi   },
+  /* DD D1 */  { 0,  &BX_CPU_C::FST_STi   },
+  /* DD D2 */  { 0,  &BX_CPU_C::FST_STi   },
+  /* DD D3 */  { 0,  &BX_CPU_C::FST_STi   },
+  /* DD D4 */  { 0,  &BX_CPU_C::FST_STi   },
+  /* DD D5 */  { 0,  &BX_CPU_C::FST_STi   },
+  /* DD D6 */  { 0,  &BX_CPU_C::FST_STi   },
+  /* DD D7 */  { 0,  &BX_CPU_C::FST_STi   },
+  /* DD D8 */  { 0,  &BX_CPU_C::FST_STi   },	// FSTP_STi
+  /* DD D9 */  { 0,  &BX_CPU_C::FST_STi   },	// FSTP_STi
+  /* DD DA */  { 0,  &BX_CPU_C::FST_STi   },	// FSTP_STi
+  /* DD DB */  { 0,  &BX_CPU_C::FST_STi   },	// FSTP_STi
+  /* DD DC */  { 0,  &BX_CPU_C::FST_STi   },	// FSTP_STi
+  /* DD DD */  { 0,  &BX_CPU_C::FST_STi   },	// FSTP_STi
+  /* DD DE */  { 0,  &BX_CPU_C::FST_STi   },	// FSTP_STi
+  /* DD DF */  { 0,  &BX_CPU_C::FST_STi   },	// FSTP_STi
+  /* DD E0 */  { 0,  &BX_CPU_C::FUCOM_STi },
+  /* DD E1 */  { 0,  &BX_CPU_C::FUCOM_STi },
+  /* DD E2 */  { 0,  &BX_CPU_C::FUCOM_STi },
+  /* DD E3 */  { 0,  &BX_CPU_C::FUCOM_STi },
+  /* DD E4 */  { 0,  &BX_CPU_C::FUCOM_STi },
+  /* DD E5 */  { 0,  &BX_CPU_C::FUCOM_STi },
+  /* DD E6 */  { 0,  &BX_CPU_C::FUCOM_STi },
+  /* DD E7 */  { 0,  &BX_CPU_C::FUCOM_STi },
+  /* DD E8 */  { 0,  &BX_CPU_C::FUCOM_STi },	// FUCOMP
+  /* DD E9 */  { 0,  &BX_CPU_C::FUCOM_STi },	// FUCOMP
+  /* DD EA */  { 0,  &BX_CPU_C::FUCOM_STi },	// FUCOMP
+  /* DD EB */  { 0,  &BX_CPU_C::FUCOM_STi },	// FUCOMP
+  /* DD EC */  { 0,  &BX_CPU_C::FUCOM_STi },	// FUCOMP
+  /* DD ED */  { 0,  &BX_CPU_C::FUCOM_STi },	// FUCOMP
+  /* DD EE */  { 0,  &BX_CPU_C::FUCOM_STi },	// FUCOMP
+  /* DD EF */  { 0,  &BX_CPU_C::FUCOM_STi },	// FUCOMP
  /* DD F0 */  { 0,  &BX_CPU_C::BxError },
  /* DD F1 */  { 0,  &BX_CPU_C::BxError },
  /* DD F2 */  { 0,  &BX_CPU_C::BxError },
@ -557,22 +557,23 @@ static BxOpcodeInfo_t BxOpcodeInfo_FloatingPoint[512] = {
  /* DD FF */  { 0,  &BX_CPU_C::BxError },

  // DE (modrm is outside 00h - BFh) (mod == 11)
-  /* DE C0 */  { 0,  &BX_CPU_C::FADDP_STi_ST0  },
-  /* DE C1 */  { 0,  &BX_CPU_C::FADDP_STi_ST0  },
-  /* DE C2 */  { 0,  &BX_CPU_C::FADDP_STi_ST0  },
-  /* DE C3 */  { 0,  &BX_CPU_C::FADDP_STi_ST0  },
-  /* DE C4 */  { 0,  &BX_CPU_C::FADDP_STi_ST0  },
-  /* DE C5 */  { 0,  &BX_CPU_C::FADDP_STi_ST0  },
-  /* DE C6 */  { 0,  &BX_CPU_C::FADDP_STi_ST0  },
-  /* DE C7 */  { 0,  &BX_CPU_C::FADDP_STi_ST0  },
-  /* DE C8 */  { 0,  &BX_CPU_C::FMULP_STi_ST0  },
-  /* DE C9 */  { 0,  &BX_CPU_C::FMULP_STi_ST0  },
-  /* DE CA */  { 0,  &BX_CPU_C::FMULP_STi_ST0  },
-  /* DE CB */  { 0,  &BX_CPU_C::FMULP_STi_ST0  },
-  /* DE CC */  { 0,  &BX_CPU_C::FMULP_STi_ST0  },
-  /* DE CD */  { 0,  &BX_CPU_C::FMULP_STi_ST0  },
-  /* DE CE */  { 0,  &BX_CPU_C::FMULP_STi_ST0  },
-  /* DE CF */  { 0,  &BX_CPU_C::FMULP_STi_ST0  },
+  // all instructions pop FPU stack
+  /* DE C0 */  { 0,  &BX_CPU_C::FADD_STi_ST0  },
+  /* DE C1 */  { 0,  &BX_CPU_C::FADD_STi_ST0  },
+  /* DE C2 */  { 0,  &BX_CPU_C::FADD_STi_ST0  },
+  /* DE C3 */  { 0,  &BX_CPU_C::FADD_STi_ST0  },
+  /* DE C4 */  { 0,  &BX_CPU_C::FADD_STi_ST0  },
+  /* DE C5 */  { 0,  &BX_CPU_C::FADD_STi_ST0  },
+  /* DE C6 */  { 0,  &BX_CPU_C::FADD_STi_ST0  },
+  /* DE C7 */  { 0,  &BX_CPU_C::FADD_STi_ST0  },
+  /* DE C8 */  { 0,  &BX_CPU_C::FMUL_STi_ST0  },
+  /* DE C9 */  { 0,  &BX_CPU_C::FMUL_STi_ST0  },
+  /* DE CA */  { 0,  &BX_CPU_C::FMUL_STi_ST0  },
+  /* DE CB */  { 0,  &BX_CPU_C::FMUL_STi_ST0  },
+  /* DE CC */  { 0,  &BX_CPU_C::FMUL_STi_ST0  },
+  /* DE CD */  { 0,  &BX_CPU_C::FMUL_STi_ST0  },
+  /* DE CE */  { 0,  &BX_CPU_C::FMUL_STi_ST0  },
+  /* DE CF */  { 0,  &BX_CPU_C::FMUL_STi_ST0  },
  /* DE D0 */  { 0,  &BX_CPU_C::BxError },
  /* DE D1 */  { 0,  &BX_CPU_C::BxError },
  /* DE D2 */  { 0,  &BX_CPU_C::BxError },
@ -589,38 +590,38 @@ static BxOpcodeInfo_t BxOpcodeInfo_FloatingPoint[512] = {
  /* DE DD */  { 0,  &BX_CPU_C::BxError },
  /* DE DE */  { 0,  &BX_CPU_C::BxError },
  /* DE DF */  { 0,  &BX_CPU_C::BxError },
-  /* DE E0 */  { 0,  &BX_CPU_C::FSUBRP_STi_ST0 },
-  /* DE E1 */  { 0,  &BX_CPU_C::FSUBRP_STi_ST0 },
-  /* DE E2 */  { 0,  &BX_CPU_C::FSUBRP_STi_ST0 },
-  /* DE E3 */  { 0,  &BX_CPU_C::FSUBRP_STi_ST0 },
-  /* DE E4 */  { 0,  &BX_CPU_C::FSUBRP_STi_ST0 },
-  /* DE E5 */  { 0,  &BX_CPU_C::FSUBRP_STi_ST0 },
-  /* DE E6 */  { 0,  &BX_CPU_C::FSUBRP_STi_ST0 },
-  /* DE E7 */  { 0,  &BX_CPU_C::FSUBRP_STi_ST0 },
-  /* DE E8 */  { 0,  &BX_CPU_C::FSUBP_STi_ST0  },
-  /* DE E9 */  { 0,  &BX_CPU_C::FSUBP_STi_ST0  },
-  /* DE EA */  { 0,  &BX_CPU_C::FSUBP_STi_ST0  },
-  /* DE EB */  { 0,  &BX_CPU_C::FSUBP_STi_ST0  },
-  /* DE EC */  { 0,  &BX_CPU_C::FSUBP_STi_ST0  },
-  /* DE ED */  { 0,  &BX_CPU_C::FSUBP_STi_ST0  },
-  /* DE EE */  { 0,  &BX_CPU_C::FSUBP_STi_ST0  },
-  /* DE EF */  { 0,  &BX_CPU_C::FSUBP_STi_ST0  },
-  /* DE F0 */  { 0,  &BX_CPU_C::FDIVRP_STi_ST0 },
-  /* DE F1 */  { 0,  &BX_CPU_C::FDIVRP_STi_ST0 },
-  /* DE F2 */  { 0,  &BX_CPU_C::FDIVRP_STi_ST0 },
-  /* DE F3 */  { 0,  &BX_CPU_C::FDIVRP_STi_ST0 },
-  /* DE F4 */  { 0,  &BX_CPU_C::FDIVRP_STi_ST0 },
-  /* DE F5 */  { 0,  &BX_CPU_C::FDIVRP_STi_ST0 },
-  /* DE F6 */  { 0,  &BX_CPU_C::FDIVRP_STi_ST0 },
-  /* DE F7 */  { 0,  &BX_CPU_C::FDIVRP_STi_ST0 },
-  /* DE F8 */  { 0,  &BX_CPU_C::FDIVP_STi_ST0  },
-  /* DE F9 */  { 0,  &BX_CPU_C::FDIVP_STi_ST0  },
-  /* DE FA */  { 0,  &BX_CPU_C::FDIVP_STi_ST0  },
-  /* DE FB */  { 0,  &BX_CPU_C::FDIVP_STi_ST0  },
-  /* DE FC */  { 0,  &BX_CPU_C::FDIVP_STi_ST0  },
-  /* DE FD */  { 0,  &BX_CPU_C::FDIVP_STi_ST0  },
-  /* DE FE */  { 0,  &BX_CPU_C::FDIVP_STi_ST0  },
-  /* DE FF */  { 0,  &BX_CPU_C::FDIVP_STi_ST0  },
+  /* DE E0 */  { 0,  &BX_CPU_C::FSUBR_STi_ST0 },
+  /* DE E1 */  { 0,  &BX_CPU_C::FSUBR_STi_ST0 },
+  /* DE E2 */  { 0,  &BX_CPU_C::FSUBR_STi_ST0 },
+  /* DE E3 */  { 0,  &BX_CPU_C::FSUBR_STi_ST0 },
+  /* DE E4 */  { 0,  &BX_CPU_C::FSUBR_STi_ST0 },
+  /* DE E5 */  { 0,  &BX_CPU_C::FSUBR_STi_ST0 },
+  /* DE E6 */  { 0,  &BX_CPU_C::FSUBR_STi_ST0 },
+  /* DE E7 */  { 0,  &BX_CPU_C::FSUBR_STi_ST0 },
+  /* DE E8 */  { 0,  &BX_CPU_C::FSUB_STi_ST0  },
+  /* DE E9 */  { 0,  &BX_CPU_C::FSUB_STi_ST0  },
+  /* DE EA */  { 0,  &BX_CPU_C::FSUB_STi_ST0  },
+  /* DE EB */  { 0,  &BX_CPU_C::FSUB_STi_ST0  },
+  /* DE EC */  { 0,  &BX_CPU_C::FSUB_STi_ST0  },
+  /* DE ED */  { 0,  &BX_CPU_C::FSUB_STi_ST0  },
+  /* DE EE */  { 0,  &BX_CPU_C::FSUB_STi_ST0  },
+  /* DE EF */  { 0,  &BX_CPU_C::FSUB_STi_ST0  },
+  /* DE F0 */  { 0,  &BX_CPU_C::FDIVR_STi_ST0 },
+  /* DE F1 */  { 0,  &BX_CPU_C::FDIVR_STi_ST0 },
+  /* DE F2 */  { 0,  &BX_CPU_C::FDIVR_STi_ST0 },
+  /* DE F3 */  { 0,  &BX_CPU_C::FDIVR_STi_ST0 },
+  /* DE F4 */  { 0,  &BX_CPU_C::FDIVR_STi_ST0 },
+  /* DE F5 */  { 0,  &BX_CPU_C::FDIVR_STi_ST0 },
+  /* DE F6 */  { 0,  &BX_CPU_C::FDIVR_STi_ST0 },
+  /* DE F7 */  { 0,  &BX_CPU_C::FDIVR_STi_ST0 },
+  /* DE F8 */  { 0,  &BX_CPU_C::FDIV_STi_ST0  },
+  /* DE F9 */  { 0,  &BX_CPU_C::FDIV_STi_ST0  },
+  /* DE FA */  { 0,  &BX_CPU_C::FDIV_STi_ST0  },
+  /* DE FB */  { 0,  &BX_CPU_C::FDIV_STi_ST0  },
+  /* DE FC */  { 0,  &BX_CPU_C::FDIV_STi_ST0  },
+  /* DE FD */  { 0,  &BX_CPU_C::FDIV_STi_ST0  },
+  /* DE FE */  { 0,  &BX_CPU_C::FDIV_STi_ST0  },
+  /* DE FF */  { 0,  &BX_CPU_C::FDIV_STi_ST0  },

  // DF (modrm is outside 00h - BFh) (mod == 11)
  /* DF C0 */  { 0,  &BX_CPU_C::BxError },
@ -663,22 +664,22 @@ static BxOpcodeInfo_t BxOpcodeInfo_FloatingPoint[512] = {
  /* DF E5 */  { 0,  &BX_CPU_C::BxError },
  /* DF E6 */  { 0,  &BX_CPU_C::BxError },
  /* DF E7 */  { 0,  &BX_CPU_C::BxError },
-  /* DF E8 */  { 0,  &BX_CPU_C::FUCOMIP_ST0_STj },
-  /* DF E9 */  { 0,  &BX_CPU_C::FUCOMIP_ST0_STj },
-  /* DF EA */  { 0,  &BX_CPU_C::FUCOMIP_ST0_STj },
-  /* DF EB */  { 0,  &BX_CPU_C::FUCOMIP_ST0_STj },
-  /* DF EC */  { 0,  &BX_CPU_C::FUCOMIP_ST0_STj },
-  /* DF ED */  { 0,  &BX_CPU_C::FUCOMIP_ST0_STj },
-  /* DF EE */  { 0,  &BX_CPU_C::FUCOMIP_ST0_STj },
-  /* DF EF */  { 0,  &BX_CPU_C::FUCOMIP_ST0_STj },
-  /* DF F0 */  { 0,  &BX_CPU_C::FCOMIP_ST0_STj  },
-  /* DF F1 */  { 0,  &BX_CPU_C::FCOMIP_ST0_STj  },
-  /* DF F2 */  { 0,  &BX_CPU_C::FCOMIP_ST0_STj  },
-  /* DF F3 */  { 0,  &BX_CPU_C::FCOMIP_ST0_STj  },
-  /* DF F4 */  { 0,  &BX_CPU_C::FCOMIP_ST0_STj  },
-  /* DF F5 */  { 0,  &BX_CPU_C::FCOMIP_ST0_STj  },
-  /* DF F6 */  { 0,  &BX_CPU_C::FCOMIP_ST0_STj  },
-  /* DF F7 */  { 0,  &BX_CPU_C::FCOMIP_ST0_STj  },
+  /* DF E8 */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },	// FUCOMIP
+  /* DF E9 */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },	// FUCOMIP
+  /* DF EA */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },	// FUCOMIP
+  /* DF EB */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },	// FUCOMIP
+  /* DF EC */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },	// FUCOMIP
+  /* DF ED */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },	// FUCOMIP
+  /* DF EE */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },	// FUCOMIP
+  /* DF EF */  { 0,  &BX_CPU_C::FUCOMI_ST0_STj },	// FUCOMIP
+  /* DF F0 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },	// FCOMIP
+  /* DF F1 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },	// FCOMIP
+  /* DF F2 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },	// FCOMIP
+  /* DF F3 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },	// FCOMIP
+  /* DF F4 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },	// FCOMIP
+  /* DF F5 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },	// FCOMIP
+  /* DF F6 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },	// FCOMIP
+  /* DF F7 */  { 0,  &BX_CPU_C::FCOMI_ST0_STj  },	// FCOMIP
  /* DF F8 */  { 0,  &BX_CPU_C::BxError },
  /* DF F9 */  { 0,  &BX_CPU_C::BxError },
  /* DF FA */  { 0,  &BX_CPU_C::BxError },
--- a/bochs/cpu/i387.h
+++ b/bochs/cpu/i387.h
@ -1,6 +1,6 @@
 /////////////////////////////////////////////////////////////////////////
 //
-//   Copyright (c) 2002 Stanislav Shwartsman
+//   Copyright (c) 2004 Stanislav Shwartsman
 //          Written by Stanislav Shwartsman <gate at fidonet.org.il>
 //
 //  This library is free software; you can redistribute it and/or
@ -19,84 +19,163 @@
 //


-
-#ifndef BX_I387_RELATED_EXTENSIONS_H
-#define BX_I387_RELATED_EXTENSIONS_H
+#ifndef _BX_I387_RELATED_EXTENSIONS_H_
+#define _BX_I387_RELATED_EXTENSIONS_H_

 #if BX_SUPPORT_FPU

+#include "fpu/softfloat.h"
+
+#define BX_FPU_REG(index) \
+    (BX_CPU_THIS_PTR the_i387.st_space[index])
+
+#if defined(NEED_CPU_REG_SHORTCUTS)
+#define FPU_PARTIAL_STATUS     (BX_CPU_THIS_PTR the_i387.swd)
+#define FPU_CONTROL_WORD       (BX_CPU_THIS_PTR the_i387.cwd)
+#define FPU_TAG_WORD           (BX_CPU_THIS_PTR the_i387.twd)
+#define FPU_TOS                (BX_CPU_THIS_PTR the_i387.tos)
+#endif
+
+#include "fpu/tag_w.h"
+#include "fpu/status_w.h"
+#include "fpu/control_w.h"
+
+extern int FPU_tagof(const floatx80 &reg);
+
 //
 // Minimal i387 structure
 //
 struct i387_t 
 {
-    Bit32u cwd; 	// control word
-    Bit32u swd; 	// status word
-    Bit32u twd;		// tag word
-    Bit32u fip;
-    Bit32u fcs;
-    Bit32u foo; 	// last instruction opcode
-    Bit32u fos;
+    i387_t() {}
+
+public:
+    void	init();		// used by FINIT/FNINIT instructions
+    void	reset();	// called on CPU reset
+
+    int    	get_tos() const { return tos; }
+
+    int 	is_IA_masked() const { return (cwd & FPU_CW_Invalid); }
+
+    Bit16u 	get_control_word() const { return cwd; }
+    Bit16u 	get_tag_word() const { return twd; }
+    Bit16u 	get_status_word() const { return (swd & ~FPU_SW_Top & 0xFFFF) | ((tos << 11) & FPU_SW_Top); }
+    Bit16u 	get_partial_status() const { return swd; }
+
+    void   	FPU_pop ();
+    void   	FPU_push();
+
+    void   	FPU_settag (int tag, int regnr);
+    int    	FPU_gettag (int regnr);
+
+    void   	FPU_settagi(int tag, int stnr) { FPU_settag(tag, tos+stnr); }
+    int    	FPU_gettagi(int stnr) { return FPU_gettag(tos+stnr); }
+
+    void	FPU_save_reg (floatx80 reg, int tag, int regnr);
+
+    void  	FPU_save_regi(floatx80 reg, int stnr) { FPU_save_regi(reg, FPU_tagof(reg), stnr); }
+    floatx80 	FPU_read_regi(int stnr) { return st_space[(tos+stnr) & 0x07]; }
+    void  	FPU_save_regi(floatx80 reg, int tag, int stnr) { FPU_save_reg(reg, tag, (tos+stnr) & 0x07); }
+
+public:
+    Bit16u cwd; 	// control word
+    Bit16u swd; 	// status word
+    Bit16u twd;		// tag word
+    Bit16u foo; 	// last instruction opcode
+
+    bx_address fip;
+    bx_address fdp;
+    Bit16u fcs;
+    Bit16u fds;
+
+    floatx80 st_space[8];

    unsigned char tos;
-    unsigned char no_update;
-    unsigned char rm;
-    unsigned char align8;
-
-    Bit64u st_space[16]; // 8*16 bytes per FP-reg (aligned) = 128 bytes
+    unsigned char align1;
+    unsigned char align2;
+    unsigned char align3;
 };

-// Endian  Host byte order         Guest (x86) byte order
-// ======================================================
-// Little  FFFFFFFFEEAAAAAA        FFFFFFFFEEAAAAAA
-// Big     AAAAAAEEFFFFFFFF        FFFFFFFFEEAAAAAA
-//
-// Legend: F - fraction/mmx
-//         E - exponent
-//         A - alignment
+#define IS_TAG_EMPTY(i) 		\
+  ((BX_CPU_THIS_PTR the_i387.FPU_gettagi(i)) == FPU_Tag_Empty)

-#ifdef BX_BIG_ENDIAN
-#if defined(__MWERKS__) && defined(macintosh)
-#pragma options align=mac68k
-#endif
-struct bx_fpu_reg_t {
-  Bit16u alignment1, alignment2, alignment3;
-  Bit16s exp;   /* Signed quantity used in internal arithmetic. */
-  Bit32u sigh;
-  Bit32u sigl;
-} GCC_ATTRIBUTE((aligned(16), packed));
-#if defined(__MWERKS__) && defined(macintosh)
-#pragma options align=reset
-#endif
-#else
-struct bx_fpu_reg_t {
-  Bit32u sigl;
-  Bit32u sigh;
-  Bit16s exp;   /* Signed quantity used in internal arithmetic. */
-  Bit16u alignment1, alignment2, alignment3;
-} GCC_ATTRIBUTE((aligned(16), packed));
-#endif
+#define IS_IA_MASKED()			\
+  (BX_CPU_THIS_PTR the_i387.get_control_word() & FPU_CW_Invalid)

-typedef struct bx_fpu_reg_t FPU_REG;
+#define BX_READ_FPU_REG(i)		\
+  (BX_CPU_THIS_PTR the_i387.FPU_read_regi(i))

-#define BX_FPU_REG(index) \
-    (BX_CPU_THIS_PTR the_i387.st_space[index*2])
+#define BX_WRITE_FPU_REGISTER_AND_TAG(value, tag, i)			\
+{                                                               	\
+    BX_CPU_THIS_PTR the_i387.FPU_save_regi((value), (tag), (i));      	\
+}                                                               	

-#define FPU_PARTIAL_STATUS     (BX_CPU_THIS_PTR the_i387.swd)
-#define FPU_CONTROL_WORD       (BX_CPU_THIS_PTR the_i387.cwd)
-#define FPU_TAG_WORD           (BX_CPU_THIS_PTR the_i387.twd)
-#define FPU_TOS                (BX_CPU_THIS_PTR the_i387.tos)
+#define BX_WRITE_FPU_REG(value, i)					\
+{                                                               	\
+    BX_CPU_THIS_PTR the_i387.FPU_save_regi((value), (i));      		\
+}                                                               	

-#define FPU_SW_SUMMARY         (0x0080)		/* exception summary */
-
-#ifdef __cplusplus
-extern "C" 
+BX_CPP_INLINE int i387_t::FPU_gettag(int regnr)
 {
-#endif
-  int FPU_tagof(FPU_REG *reg) BX_CPP_AttrRegparmN(1);
-#ifdef __cplusplus
+  return (get_tag_word() >> ((regnr & 7)*2)) & 3;
 }
-#endif
+
+BX_CPP_INLINE void i387_t::FPU_settag (int tag, int regnr)
+{
+  regnr &= 7;
+  twd &= ~(3 << (regnr*2));
+  twd |= (tag & 3) << (regnr*2);
+}
+
+BX_CPP_INLINE void i387_t::FPU_push(void)
+{
+  tos--;
+}
+
+BX_CPP_INLINE void i387_t::FPU_pop(void)
+{
+  twd |= 3 << ((tos & 7)*2);
+  tos++;
+}
+
+BX_CPP_INLINE void i387_t::FPU_save_reg (floatx80 reg, int tag, int regnr)
+{
+  st_space[regnr] = reg;
+  FPU_settag(tag, regnr);
+}
+
+#include <string.h>
+
+BX_CPP_INLINE void i387_t::init()
+{
+  cwd = 0x037F;
+  swd = 0;
+  tos = 0;
+  twd = 0xFFFF;
+  foo = 0;
+  fip = 0;
+  fcs = 0;
+  fds = 0;
+  fdp = 0;
+}
+
+BX_CPP_INLINE void i387_t::reset()
+{
+  cwd = 0x0040;
+  swd = 0;
+  tos = 0;
+  twd = 0x5555;
+  foo = 0;
+  fip = 0;
+  fcs = 0;
+  fds = 0;
+  fdp = 0;
+
+  memset(st_space, 0, sizeof(floatx80)*8);
+}
+
+extern const floatx80 Const_Z;
+extern const floatx80 Const_1;

 #if BX_SUPPORT_MMX

--- a/bochs/cpu/init.cc
+++ b/bochs/cpu/init.cc
@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////
-// $Id: init.cc,v 1.51 2004-02-12 00:56:21 cbothamy Exp $
+// $Id: init.cc,v 1.52 2004-06-18 14:11:06 sshwarts Exp $
 /////////////////////////////////////////////////////////////////////////
 //
 //  Copyright (C) 2001  MandrakeSoft S.A.
@ -168,7 +168,7 @@ cpu_param_handler (bx_param_c *param, int set, Bit64s val)

 void BX_CPU_C::init(BX_MEM_C *addrspace)
 {
-  BX_DEBUG(( "Init $Id: init.cc,v 1.51 2004-02-12 00:56:21 cbothamy Exp $"));
+  BX_DEBUG(( "Init $Id: init.cc,v 1.52 2004-06-18 14:11:06 sshwarts Exp $"));
  // BX_CPU_C constructor
  BX_CPU_THIS_PTR set_INTR (0);
 #if BX_SUPPORT_APIC
@ -866,9 +866,9 @@ BX_CPU_C::reset(unsigned source)
  BX_CPU_THIS_PTR trace_reg = 0;
 #endif

-  // Init the Floating Point Unit
+  // Reset the Floating Point Unit
 #if BX_SUPPORT_FPU
-  fpu_init();
+  BX_CPU_THIS_PTR the_i387.reset();
 #endif

 #if (BX_SMP_PROCESSORS > 1)
--- a/bochs/cpu/paging.cc
+++ b/bochs/cpu/paging.cc
@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////
-// $Id: paging.cc,v 1.43 2003-12-30 22:12:45 cbothamy Exp $
+// $Id: paging.cc,v 1.44 2004-06-18 14:11:07 sshwarts Exp $
 /////////////////////////////////////////////////////////////////////////
 //
 //  Copyright (C) 2001  MandrakeSoft S.A.
@ -422,7 +422,12 @@ BX_CPU_C::CR3_change(bx_address value)
  // flush TLB even if value does not change
  TLB_flush(0); // 0 = Don't flush Global entries.
  BX_CPU_THIS_PTR cr3 = value;
-  BX_CPU_THIS_PTR cr3_masked = value & 0xfffff000;
+#if BX_SupportPAE
+  if (BX_CPU_THIS_PTR cr4.get_PAE())
+    BX_CPU_THIS_PTR cr3_masked = value & 0xffffffe0;
+  else
+#endif
+    BX_CPU_THIS_PTR cr3_masked = value & 0xfffff000;
 }

  void
--- a/bochs/cpu/proc_ctrl.cc
+++ b/bochs/cpu/proc_ctrl.cc
@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////
-// $Id: proc_ctrl.cc,v 1.80 2004-06-03 17:57:29 sshwarts Exp $
+// $Id: proc_ctrl.cc,v 1.81 2004-06-18 14:11:07 sshwarts Exp $
 /////////////////////////////////////////////////////////////////////////
 //
 //  Copyright (C) 2001  MandrakeSoft S.A.
@ -655,7 +655,7 @@ void BX_CPU_C::MOV_CdRd(bxInstruction_c *i)
  if (protected_mode() && CPL!=0) {
    BX_INFO(("MOV_CdRd: #GP(0) if CPL is not 0"));
    exception(BX_GP_EXCEPTION, 0, 0);
-    }
+  }

  val_32 = BX_READ_32BIT_REG(i->rm());

--- a/bochs/cpu/sse_move.cc
+++ b/bochs/cpu/sse_move.cc
@ -84,18 +84,14 @@ void BX_CPU_C::STMXCSR(bxInstruction_c *i)
 void BX_CPU_C::FXSAVE(bxInstruction_c *i)
 {
 #if (BX_CPU_LEVEL >= 6) || (BX_CPU_LEVEL_HACKED >= 6)
-  Bit16u twd = BX_CPU_THIS_PTR the_i387.twd, tag_byte = 0;
-  Bit16u status_w = BX_CPU_THIS_PTR the_i387.swd;
-  Bit16u tos = BX_CPU_THIS_PTR the_i387.tos;
+  Bit16u twd = BX_CPU_THIS_PTR the_i387.get_tag_word(), tag_byte = 0;
  unsigned index;
  BxPackedXmmRegister xmm;

  BX_DEBUG(("FXSAVE: save FPU/MMX/SSE state"));

-#define SW_TOP (0x3800)
-
-  xmm.xmm16u(0) = (BX_CPU_THIS_PTR the_i387.cwd);
-  xmm.xmm16u(1) = (status_w & ~SW_TOP & 0xffff) | ((tos << 11) & SW_TOP);
+  xmm.xmm16u(0) = BX_CPU_THIS_PTR the_i387.get_control_word();
+  xmm.xmm16u(1) = BX_CPU_THIS_PTR the_i387.get_status_word ();

  if(twd & 0x0003 != 0x0003) tag_byte |= 0x0100;
  if(twd & 0x000c != 0x000c) tag_byte |= 0x0200;
@ -256,19 +252,14 @@ void BX_CPU_C::FXRSTOR(bxInstruction_c *i)

  tag_byte_mask = 0x0100;

-#define FPU_TAG_VALID   0x00
-#define FPU_TAG_ZERO    0x01
-#define FPU_TAG_SPECIAL 0x02
-#define FPU_TAG_EMPTY   0x03
-
  for(index = 0;index < 8; index++, twd <<= 2, tag_byte_mask <<= 1)
  {
      if(tag_byte & tag_byte_mask) {
-          bx_fpu_reg_t *fpu_reg = (bx_fpu_reg_t *) &(BX_FPU_REG(index));
+          floatx80 &fpu_reg = BX_FPU_REG(index);
          twd = FPU_tagof(fpu_reg);
      }
      else {
-         twd |= FPU_TAG_EMPTY;
+         twd |= FPU_Tag_Empty;
      }
  }

--- a/bochs/cpu/sse_pfp.cc
+++ b/bochs/cpu/sse_pfp.cc
@ -24,8 +24,6 @@

 #if BX_SUPPORT_SSE

-#include "softfloat.h"
-
 void BX_CPU_C::check_exceptionsSSE(int exceptions_flags)
 {
  int unmasked = ~(MXCSR.get_exceptions_masks()) & exceptions_flags;
@ -40,15 +38,15 @@ void BX_CPU_C::check_exceptionsSSE(int exceptions_flags)
  }
 }

-static void mxcsr_to_softfloat_status_word(softfloat_status_word_t &status, bx_mxcsr_t mxcsr)
+static void mxcsr_to_softfloat_status_word(float_status_t &status, bx_mxcsr_t mxcsr)
 {
-  status.float_detect_tininess = float_tininess_after_rounding;
  status.float_exception_flags = 0; // clear exceptions before execution
  status.float_nan_handling_mode = float_first_operand_nan;
  status.float_rounding_mode = mxcsr.get_rounding_mode();
  // if underflow is masked and FUZ is 1, set it to 1, else to 0
  status.flush_underflow_to_zero = 
       (mxcsr.get_flush_masked_underflow() && mxcsr.get_UM()) ? 1 : 0;
+  status.float_precision_lost_up = 0;
 }

 // handle DAZ
@ -107,7 +105,7 @@ void BX_CPU_C::CVTPI2PS_VpsQq(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  result.xmm32u(0) = int32_to_float32(MMXUD0(op), status_word);
@ -164,7 +162,7 @@ void BX_CPU_C::CVTSI2SD_VsdEd(bxInstruction_c *i)
 #if BX_SUPPORT_SSE >= 2
  BX_CPU_THIS_PTR prepareSSE();

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);
  Float64 result;

@ -221,7 +219,7 @@ void BX_CPU_C::CVTSI2SS_VssEd(bxInstruction_c *i)
 #if BX_SUPPORT_SSE >= 1
  BX_CPU_THIS_PTR prepareSSE();

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);
  Float32 result;

@ -290,7 +288,7 @@ void BX_CPU_C::CVTTPS2PI_PqWps(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  Float32 r0 = (Float32)(op & 0xFFFFFFFF);
@ -331,7 +329,7 @@ void BX_CPU_C::CVTTPD2PI_PqWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  MMXUD0(result) = float64_to_int32_round_to_zero(op.xmm64u(0), status_word);
@ -367,7 +365,7 @@ void BX_CPU_C::CVTTSD2SI_GdWsd(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

 #if BX_SUPPORT_X86_64 
@ -413,7 +411,7 @@ void BX_CPU_C::CVTTSS2SI_GdWss(bxInstruction_c *i)
    read_virtual_dword(i->seg(), RMAddr(i), &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

 #if BX_SUPPORT_X86_64 
@ -462,7 +460,7 @@ void BX_CPU_C::CVTPS2PI_PqWps(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  Float32 r0 = (Float32)(op & 0xFFFFFFFF);
@ -504,7 +502,7 @@ void BX_CPU_C::CVTPD2PI_PqWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  MMXUD0(result) = float64_to_int32(op.xmm64u(0), status_word);
@ -541,7 +539,7 @@ void BX_CPU_C::CVTSD2SI_GdWsd(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

 #if BX_SUPPORT_X86_64 
@ -588,7 +586,7 @@ void BX_CPU_C::CVTSS2SI_GdWss(bxInstruction_c *i)
    read_virtual_dword(i->seg(), RMAddr(i), &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

 #if BX_SUPPORT_X86_64 
@ -634,7 +632,7 @@ void BX_CPU_C::CVTPS2PD_VpsWps(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  Float32 r0 = (Float32)(op & 0xFFFFFFFF);
@ -682,7 +680,7 @@ void BX_CPU_C::CVTPD2PS_VpdWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -730,7 +728,7 @@ void BX_CPU_C::CVTSD2SS_VsdWsd(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);
  if (MXCSR.get_DAZ()) op = handleDAZ(op);
  result = float64_to_float32(op, status_word);
@ -765,7 +763,7 @@ void BX_CPU_C::CVTSS2SD_VssWss(bxInstruction_c *i)
    read_virtual_dword(i->seg(), RMAddr(i), &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);
  if (MXCSR.get_DAZ()) op = handleDAZ(op);
  result = float32_to_float64(op, status_word);
@ -801,7 +799,7 @@ void BX_CPU_C::CVTDQ2PS_VpsWdq(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  result.xmm32u(0) = 
@ -844,7 +842,7 @@ void BX_CPU_C::CVTPS2DQ_VdqWps(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  result.xmm32u(0) = 
@ -886,7 +884,7 @@ void BX_CPU_C::CVTTPS2DQ_VdqWps(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  result.xmm32u(0) = 
@ -928,7 +926,7 @@ void BX_CPU_C::CVTTPD2DQ_VqWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  result.xmm32u(0) = 
@ -968,7 +966,7 @@ void BX_CPU_C::CVTPD2DQ_VqWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  result.xmm32u(0) = 
@ -1041,7 +1039,7 @@ void BX_CPU_C::UCOMISS_VssWss(bxInstruction_c *i)
    read_virtual_dword(i->seg(), RMAddr(i), &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1052,25 +1050,7 @@ void BX_CPU_C::UCOMISS_VssWss(bxInstruction_c *i)

  int rc = float32_compare_quiet(op1, op2, status_word);
  BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
-
-  switch(rc) {
-     case float_relation_unordered:
-         setEFlagsOSZAPC(EFlagsZFMask | EFlagsPFMask | EFlagsCFMask);
-         break;
-
-     case float_relation_greater:
-         setEFlagsOSZAPC(0);
-         break;
-
-     case float_relation_less:
-         setEFlagsOSZAPC(EFlagsCFMask);
-         break;
-
-     case float_relation_equal:
-         setEFlagsOSZAPC(EFlagsZFMask);
-         break;
-  }
-
+  BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
 #else
  BX_INFO(("UCOMISS_VssWss: required SSE, use --enable-sse option"));
  UndefinedOpcode(i);
@ -1098,7 +1078,7 @@ void BX_CPU_C::UCOMISD_VsdWsd(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1109,25 +1089,7 @@ void BX_CPU_C::UCOMISD_VsdWsd(bxInstruction_c *i)

  int rc = float64_compare_quiet(op1, op2, status_word);
  BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
-
-  switch(rc) {
-     case float_relation_unordered:
-         setEFlagsOSZAPC(EFlagsZFMask | EFlagsPFMask | EFlagsCFMask);
-         break;
-
-     case float_relation_greater:
-         setEFlagsOSZAPC(0);
-         break;
-
-     case float_relation_less:
-         setEFlagsOSZAPC(EFlagsCFMask);
-         break;
-
-     case float_relation_equal:
-         setEFlagsOSZAPC(EFlagsZFMask);
-         break;
-  }
-
+  BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
 #else
  BX_INFO(("UCOMISD_VsdWsd: required SSE2, use --enable-sse option"));
  UndefinedOpcode(i);
@ -1155,7 +1117,7 @@ void BX_CPU_C::COMISS_VpsWps(bxInstruction_c *i)
    read_virtual_dword(i->seg(), RMAddr(i), &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1166,25 +1128,7 @@ void BX_CPU_C::COMISS_VpsWps(bxInstruction_c *i)

  int rc = float32_compare(op1, op2, status_word);
  BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
-
-  switch(rc) {
-     case float_relation_unordered:
-         setEFlagsOSZAPC(EFlagsZFMask | EFlagsPFMask | EFlagsCFMask);
-         break;
-
-     case float_relation_greater:
-         setEFlagsOSZAPC(0);
-         break;
-
-     case float_relation_less:
-         setEFlagsOSZAPC(EFlagsCFMask);
-         break;
-
-     case float_relation_equal:
-         setEFlagsOSZAPC(EFlagsZFMask);
-         break;
-  }
-
+  BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
 #else
  BX_INFO(("COMISS_VpsWps: required SSE, use --enable-sse option"));
  UndefinedOpcode(i);
@ -1212,7 +1156,7 @@ void BX_CPU_C::COMISD_VpdWpd(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1223,25 +1167,7 @@ void BX_CPU_C::COMISD_VpdWpd(bxInstruction_c *i)

  int rc = float64_compare(op1, op2, status_word);
  BX_CPU_THIS_PTR check_exceptionsSSE(status_word.float_exception_flags);
-
-  switch(rc) {
-     case float_relation_unordered:
-         setEFlagsOSZAPC(EFlagsZFMask | EFlagsPFMask | EFlagsCFMask);
-         break;
-
-     case float_relation_greater:
-         setEFlagsOSZAPC(0);
-         break;
-
-     case float_relation_less:
-         setEFlagsOSZAPC(EFlagsCFMask);
-         break;
-
-     case float_relation_equal:
-         setEFlagsOSZAPC(EFlagsZFMask);
-         break;
-  }
-
+  BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
 #else
  BX_INFO(("COMISD_VpdWpd: required SSE2, use --enable-sse option"));
  UndefinedOpcode(i);
@ -1269,7 +1195,7 @@ void BX_CPU_C::SQRTPS_VpsWps(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1319,7 +1245,7 @@ void BX_CPU_C::SQRTPD_VpdWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1363,7 +1289,7 @@ void BX_CPU_C::SQRTSD_VsdWsd(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);
  if (MXCSR.get_DAZ()) op = handleDAZ(op);
  result = float64_sqrt(op, status_word);
@ -1397,7 +1323,7 @@ void BX_CPU_C::SQRTSS_VssWss(bxInstruction_c *i)
    read_virtual_dword(i->seg(), RMAddr(i), &op);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);
  if (MXCSR.get_DAZ()) op = handleDAZ(op);
  result = float32_sqrt(op, status_word);
@ -1431,7 +1357,7 @@ void BX_CPU_C::ADDPS_VpsWps(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) {
@ -1485,7 +1411,7 @@ void BX_CPU_C::ADDPD_VpdWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1532,7 +1458,7 @@ void BX_CPU_C::ADDSD_VsdWsd(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1572,7 +1498,7 @@ void BX_CPU_C::ADDSS_VssWss(bxInstruction_c *i)
    read_virtual_dword(i->seg(), RMAddr(i), &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1612,7 +1538,7 @@ void BX_CPU_C::MULPS_VpsWps(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) {
@ -1666,7 +1592,7 @@ void BX_CPU_C::MULPD_VpdWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1713,7 +1639,7 @@ void BX_CPU_C::MULSD_VsdWsd(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1753,7 +1679,7 @@ void BX_CPU_C::MULSS_VssWss(bxInstruction_c *i)
    read_virtual_dword(i->seg(), RMAddr(i), &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1793,7 +1719,7 @@ void BX_CPU_C::SUBPS_VpsWps(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) {
@ -1847,7 +1773,7 @@ void BX_CPU_C::SUBPD_VpdWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1894,7 +1820,7 @@ void BX_CPU_C::SUBSD_VsdWsd(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1934,7 +1860,7 @@ void BX_CPU_C::SUBSS_VssWss(bxInstruction_c *i)
    read_virtual_dword(i->seg(), RMAddr(i), &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -1974,7 +1900,7 @@ void BX_CPU_C::MINPS_VpsWps(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);
  int rc;

@ -2033,7 +1959,7 @@ void BX_CPU_C::MINPD_VpdWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);
  int rc;

@ -2083,7 +2009,7 @@ void BX_CPU_C::MINSD_VsdWsd(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -2124,7 +2050,7 @@ void BX_CPU_C::MINSS_VssWss(bxInstruction_c *i)
    read_virtual_dword(i->seg(), RMAddr(i), &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -2165,7 +2091,7 @@ void BX_CPU_C::DIVPS_VpsWps(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) {
@ -2219,7 +2145,7 @@ void BX_CPU_C::DIVPD_VpdWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -2266,7 +2192,7 @@ void BX_CPU_C::DIVSD_VsdWsd(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -2306,7 +2232,7 @@ void BX_CPU_C::DIVSS_VssWss(bxInstruction_c *i)
    read_virtual_dword(i->seg(), RMAddr(i), &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -2346,7 +2272,7 @@ void BX_CPU_C::MAXPS_VpsWps(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);
  int rc;

@ -2405,7 +2331,7 @@ void BX_CPU_C::MAXPD_VpdWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);
  int rc;

@ -2455,7 +2381,7 @@ void BX_CPU_C::MAXSD_VsdWsd(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -2496,7 +2422,7 @@ void BX_CPU_C::MAXSS_VssWss(bxInstruction_c *i)
    read_virtual_dword(i->seg(), RMAddr(i), &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -2537,7 +2463,7 @@ void BX_CPU_C::HADDPD_VpdWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -2584,7 +2510,7 @@ void BX_CPU_C::HADDPS_VpsWps(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) {
@ -2638,7 +2564,7 @@ void BX_CPU_C::HSUBPD_VpdWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -2685,7 +2611,7 @@ void BX_CPU_C::HSUBPS_VpsWps(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) {
@ -2739,7 +2665,7 @@ void BX_CPU_C::CMPPS_VpsWpsIb(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status;
+  float_status_t status;
  mxcsr_to_softfloat_status_word(status, MXCSR);
  int ib = i->Ib();

@ -2813,7 +2739,7 @@ void BX_CPU_C::CMPPD_VpdWpdIb(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status;
+  float_status_t status;
  mxcsr_to_softfloat_status_word(status, MXCSR);
  int ib = i->Ib();

@ -2876,7 +2802,7 @@ void BX_CPU_C::CMPSD_VsdWsdIb(bxInstruction_c *i)
    read_virtual_qword(i->seg(), RMAddr(i), (Bit64u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);
  int ib = i->Ib();

@ -2932,7 +2858,7 @@ void BX_CPU_C::CMPSS_VssWssIb(bxInstruction_c *i)
    read_virtual_dword(i->seg(), RMAddr(i), (Bit32u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);
  int ib = i->Ib();

@ -2988,7 +2914,7 @@ void BX_CPU_C::ADDSUBPD_VpdWpd(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) 
@ -3035,7 +2961,7 @@ void BX_CPU_C::ADDSUBPS_VpsWps(bxInstruction_c *i)
    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op2);
  }

-  softfloat_status_word_t status_word;
+  float_status_t status_word;
  mxcsr_to_softfloat_status_word(status_word, MXCSR);

  if (MXCSR.get_DAZ()) {
--- a/bochs/cpu/sse_rcp.cc
+++ b/bochs/cpu/sse_rcp.cc
@ -24,8 +24,7 @@

 #if BX_SUPPORT_SSE

-#include "softfloat.h"
-#include "softfloat-specialize.h"
+#include "fpu/softfloat-specialize.h"

 BX_CPP_INLINE Float32 convert_to_QNaN(Float32 op)
 {
--- a/bochs/fpu/Makefile.in
+++ b/bochs/fpu/Makefile.in
@ -40,47 +40,26 @@ CXXFLAGS = @CXXFLAGS@ @GUI_CXXFLAGS@

 LDFLAGS    = @LDFLAGS@
 LIBS       = @LIBS@
-X_LIBS     = @X_LIBS@
-X_PRE_LIBS = @X_PRE_LIBS@
 RANLIB     = @RANLIB@

-#DEBUG	 = -DDEBUGGING
-PARANOID = -DPARANOID
-
 L_TARGET = libfpu.a


 BX_INCDIRS = -I.. -I$(srcdir)/.. -I../@INSTRUMENT_DIR@ -I$(srcdir)/../@INSTRUMENT_DIR@ -I. -I$(srcdir)/. -I./stubs -I$(srcdir)/./stubs

-FPU_FLAGS = -DUSE_WITH_CPU_SIM $(PARANOID) $(DEBUG) -DNO_ASSEMBLER
-FPU_GLUE_OBJ = wmFPUemu_glue.o fpu.o
-
-# From 'C' language sources:
-C_OBJS = fpu_entry.o errors.o reg_ld_str.o load_store.o \
-	fpu_arith.o fpu_aux.o fpu_etc.o fpu_tags.o fpu_trig.o \
-	poly_atan.o poly_l2.o poly_2xm1.o poly_sin.o poly_tan.o \
-	reg_add_sub.o reg_compare.o reg_constant.o reg_convert.o \
-	reg_divide.o reg_mul.o fpu_compare.o reg_u_add.o \
-        reg_u_div.o reg_u_mul.o reg_u_sub.o reg_norm.o reg_round.o \
-	wm_shrx.o wm_sqrt.o div_Xsig.o polynom_Xsig.o round_Xsig.o \
-	shr_Xsig.o mul_Xsig.o
-
-L_OBJS = $(C_OBJS)
-
-OBJS = \
-	@FPU_GLUE_OBJ@ \
-	$(L_OBJS)
-
-
-BX_INCLUDES = ../bochs.h ../config.h
+OBJS = ferr.o fpu.o fpu_arith.o fpu_compare.o fpu_const.o \
+               fpu_load_store.o fpu_misc.o fpu_trans.o fpu_tags.o \
+               fprem.o fsincos.o f2xm1.o fyl2x.o fpatan.o \
+               softfloat.o softfloatx80.o softfloat-specialize.o \
+               softfloat-round-pack.o poly.o

 all: libfpu.a

 .@CPP_SUFFIX@.o:
-	$(CXX) @DASH@c $(BX_INCDIRS) $(CXXFLAGS) $(FPU_FLAGS) @CXXFP@$< @OFP@$@
+	$(CXX) @DASH@c $(BX_INCDIRS) $(CXXFLAGS) @CXXFP@$< @OFP@$@

 .c.o:
-	$(CC) @DASH@c $(CFLAGS) $(FPU_FLAGS) $(BX_INCDIRS) $< @OFP@$@
+	$(CC) @DASH@c $(CFLAGS) $(BX_INCDIRS) $< @OFP@$@


 libfpu.a: $(OBJS)
@ -88,8 +67,6 @@ libfpu.a: $(OBJS)
 	@MAKELIB@ $(OBJS)
 	$(RANLIB) libfpu.a

-$(OBJS): $(BX_INCLUDES)
-
 clean:
 	@RMCOMMAND@ *.o
 	@RMCOMMAND@ *.a
@ -101,113 +78,160 @@ dist-clean: clean
 # dependencies generated by
 #  gcc -MM -I. -I.. -I../instrument/stubs -Istubs *.c  *.cc | sed 's/\.cc/.@CPP_SUFFIX@/g'
 ###########################################
-div_Xsig.o: div_Xsig.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h poly.h
-errors.o: errors.c stubs/linux/signal.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h exception.h \
-  status_w.h control_w.h reg_constant.h
-fpu_arith.o: fpu_arith.c fpu_system.h ../config.h ../cpu/i387.h fpu_emu.h \
-  stubs/linux/linkage.h fpu_proto.h control_w.h status_w.h
-fpu_aux.o: fpu_aux.c fpu_system.h ../config.h ../cpu/i387.h exception.h \
-  fpu_emu.h stubs/linux/linkage.h fpu_proto.h status_w.h control_w.h
-fpu_compare.o: fpu_compare.c fpu_system.h ../config.h ../cpu/i387.h \
-  exception.h fpu_emu.h stubs/linux/linkage.h fpu_proto.h status_w.h
-fpu_entry.o: fpu_entry.c fpu_system.h ../config.h ../cpu/i387.h fpu_emu.h \
-  stubs/linux/linkage.h fpu_proto.h exception.h control_w.h status_w.h \
-  stubs/linux/signal.h
-fpu_etc.o: fpu_etc.c fpu_system.h ../config.h ../cpu/i387.h exception.h \
-  fpu_emu.h stubs/linux/linkage.h fpu_proto.h status_w.h reg_constant.h
-fpu_tags.o: fpu_tags.c fpu_emu.h stubs/linux/linkage.h fpu_system.h \
-  ../config.h ../cpu/i387.h fpu_proto.h exception.h
-fpu_trig.o: fpu_trig.c fpu_system.h ../config.h ../cpu/i387.h exception.h \
-  fpu_emu.h stubs/linux/linkage.h fpu_proto.h status_w.h control_w.h \
-  reg_constant.h
-load_store.o: load_store.c fpu_system.h ../config.h ../cpu/i387.h \
-  exception.h fpu_emu.h stubs/linux/linkage.h fpu_proto.h status_w.h \
-  control_w.h
-mul_Xsig.o: mul_Xsig.c fpu_emu.h stubs/linux/linkage.h fpu_system.h \
-  ../config.h ../cpu/i387.h fpu_proto.h poly.h
-poly_2xm1.o: poly_2xm1.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h reg_constant.h \
-  control_w.h poly.h
-poly_atan.o: poly_atan.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h reg_constant.h \
-  status_w.h control_w.h poly.h
-poly_l2.o: poly_l2.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h reg_constant.h \
-  control_w.h poly.h
-poly_sin.o: poly_sin.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h reg_constant.h \
-  control_w.h poly.h
-poly_tan.o: poly_tan.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h reg_constant.h \
-  control_w.h poly.h
-polynom_Xsig.o: polynom_Xsig.c fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h poly.h
-reg_add_sub.o: reg_add_sub.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h reg_constant.h \
-  control_w.h
-reg_compare.o: reg_compare.c fpu_system.h ../config.h ../cpu/i387.h \
-  exception.h fpu_emu.h stubs/linux/linkage.h fpu_proto.h control_w.h \
-  status_w.h
-reg_constant.o: reg_constant.c fpu_system.h ../config.h ../cpu/i387.h \
-  fpu_emu.h stubs/linux/linkage.h fpu_proto.h status_w.h reg_constant.h \
-  control_w.h
-reg_convert.o: reg_convert.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h
-reg_divide.o: reg_divide.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h reg_constant.h
-reg_ld_str.o: reg_ld_str.c fpu_emu.h stubs/linux/linkage.h fpu_system.h \
-  ../config.h ../cpu/i387.h fpu_proto.h exception.h reg_constant.h \
-  control_w.h status_w.h
-reg_mul.o: reg_mul.c fpu_emu.h stubs/linux/linkage.h fpu_system.h \
-  ../config.h ../cpu/i387.h fpu_proto.h exception.h reg_constant.h
-reg_norm.o: reg_norm.c fpu_emu.h stubs/linux/linkage.h fpu_system.h \
-  ../config.h ../cpu/i387.h fpu_proto.h
-reg_round.o: reg_round.c fpu_emu.h stubs/linux/linkage.h fpu_system.h \
-  ../config.h ../cpu/i387.h fpu_proto.h exception.h control_w.h
-reg_u_add.o: reg_u_add.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h control_w.h
-reg_u_div.o: reg_u_div.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h control_w.h
-reg_u_mul.o: reg_u_mul.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h control_w.h
-reg_u_sub.o: reg_u_sub.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h control_w.h
-round_Xsig.o: round_Xsig.c fpu_emu.h stubs/linux/linkage.h fpu_system.h \
-  ../config.h ../cpu/i387.h fpu_proto.h poly.h
-shr_Xsig.o: shr_Xsig.c fpu_emu.h stubs/linux/linkage.h fpu_system.h \
-  ../config.h ../cpu/i387.h fpu_proto.h poly.h
-wm_shrx.o: wm_shrx.c fpu_emu.h stubs/linux/linkage.h fpu_system.h \
-  ../config.h ../cpu/i387.h fpu_proto.h
-wm_sqrt.o: wm_sqrt.c exception.h fpu_emu.h stubs/linux/linkage.h \
-  fpu_system.h ../config.h ../cpu/i387.h fpu_proto.h
-wmFPUemu_glue.o: wmFPUemu_glue.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
-  ../bx_debug/debug.h ../bxversion.h ../gui/siminterface.h \
-  ../state_file.h ../cpu/cpu.h ../cpu/lazy_flags.h ../cpu/hostasm.h \
-  ../cpu/apic.h ../cpu/i387.h ../cpu/xmm.h ../memory/memory.h \
-  ../pc_system.h ../plugin.h ../extplugin.h ../gui/gui.h \
-  ../gui/textconfig.h ../gui/keymap.h ../iodev/iodev.h ../iodev/pci.h \
-  ../iodev/pci2isa.h ../iodev/pcivga.h ../iodev/vga.h ../iodev/ioapic.h \
-  ../iodev/biosdev.h ../iodev/cmos.h ../iodev/dma.h ../iodev/floppy.h \
-  ../iodev/harddrv.h ../iodev/cdrom.h ../iodev/vmware3.h \
-  ../iodev/keyboard.h ../iodev/parallel.h ../iodev/pic.h ../iodev/pit.h \
-  ../iodev/pit_wrap.h ../iodev/pit82c54.h ../iodev/virt_timer.h \
-  ../iodev/serial.h ../iodev/unmapped.h ../iodev/eth.h ../iodev/ne2k.h \
+poly.o: poly.@CPP_SUFFIX@ softfloatx80.h softfloat.h ../config.h \
+  softfloat-specialize.h softfloat-macros.h softfloat-round-pack.h
+f2xm1.o: f2xm1.@CPP_SUFFIX@ softfloatx80.h softfloat.h ../config.h \
+  softfloat-specialize.h softfloat-macros.h softfloat-round-pack.h
+ferr.o: ferr.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/debug.h \
+  ../bxversion.h ../gui/siminterface.h ../state_file.h ../cpu/cpu.h \
+  ../cpu/lazy_flags.h ../cpu/hostasm.h ../cpu/apic.h ../cpu/i387.h \
+  ../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
+  ../cpu/xmm.h ../disasm/disasm.h ../memory/memory.h ../pc_system.h \
+  ../plugin.h ../extplugin.h ../gui/gui.h ../gui/textconfig.h \
+  ../gui/keymap.h ../iodev/iodev.h ../iodev/pci.h ../iodev/pci2isa.h \
+  ../iodev/pcivga.h ../iodev/vga.h ../iodev/ioapic.h ../iodev/biosdev.h \
+  ../iodev/cmos.h ../iodev/dma.h ../iodev/floppy.h ../iodev/harddrv.h \
+  ../iodev/cdrom.h ../iodev/vmware3.h ../iodev/keyboard.h \
+  ../iodev/parallel.h ../iodev/pic.h ../iodev/pit.h ../iodev/pit_wrap.h \
+  ../iodev/pit82c54.h ../iodev/virt_timer.h ../iodev/serial.h \
+  ../iodev/unmapped.h ../iodev/eth.h ../iodev/ne2k.h \
  ../iodev/guest2host.h ../iodev/slowdown_timer.h ../iodev/extfpuirq.h \
  ../iodev/gameport.h ../iodev/speaker.h ../instrument/stubs/instrument.h \
-  fpu_emu.h stubs/linux/linkage.h fpu_system.h fpu_proto.h \
-  stubs/linux/signal.h
+  softfloat-specialize.h
+fpatan.o: fpatan.@CPP_SUFFIX@ softfloatx80.h softfloat.h ../config.h \
+  softfloat-specialize.h softfloat-macros.h softfloat-round-pack.h \
+  fpu_constant.h
+fprem.o: fprem.@CPP_SUFFIX@ softfloatx80.h softfloat.h ../config.h \
+  softfloat-specialize.h softfloat-round-pack.h softfloat-macros.h
 fpu.o: fpu.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/debug.h \
  ../bxversion.h ../gui/siminterface.h ../state_file.h ../cpu/cpu.h \
-  ../cpu/lazy_flags.h ../cpu/i387.h ../cpu/xmm.h ../memory/memory.h \
-  ../pc_system.h ../plugin.h ../extplugin.h ../gui/gui.h \
-  ../gui/textconfig.h ../gui/keymap.h ../iodev/iodev.h ../iodev/pci.h \
-  ../iodev/pci2isa.h ../iodev/pcivga.h ../iodev/vga.h ../iodev/ioapic.h \
-  ../iodev/biosdev.h ../iodev/cmos.h ../iodev/dma.h ../iodev/floppy.h \
-  ../iodev/harddrv.h ../iodev/cdrom.h ../iodev/vmware3.h \
-  ../iodev/keyboard.h ../iodev/parallel.h ../iodev/pic.h ../iodev/pit.h \
-  ../iodev/pit_wrap.h ../iodev/pit82c54.h ../iodev/virt_timer.h \
-  ../iodev/serial.h ../iodev/unmapped.h ../iodev/eth.h ../iodev/ne2k.h \
+  ../cpu/lazy_flags.h ../cpu/hostasm.h ../cpu/apic.h ../cpu/i387.h \
+  ../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h ../fpu/control_w.h \
+  ../cpu/xmm.h ../disasm/disasm.h ../memory/memory.h ../pc_system.h \
+  ../plugin.h ../extplugin.h ../gui/gui.h ../gui/textconfig.h \
+  ../gui/keymap.h ../iodev/iodev.h ../iodev/pci.h ../iodev/pci2isa.h \
+  ../iodev/pcivga.h ../iodev/vga.h ../iodev/ioapic.h ../iodev/biosdev.h \
+  ../iodev/cmos.h ../iodev/dma.h ../iodev/floppy.h ../iodev/harddrv.h \
+  ../iodev/cdrom.h ../iodev/vmware3.h ../iodev/keyboard.h \
+  ../iodev/parallel.h ../iodev/pic.h ../iodev/pit.h ../iodev/pit_wrap.h \
+  ../iodev/pit82c54.h ../iodev/virt_timer.h ../iodev/serial.h \
+  ../iodev/unmapped.h ../iodev/eth.h ../iodev/ne2k.h \
  ../iodev/guest2host.h ../iodev/slowdown_timer.h ../iodev/extfpuirq.h \
-  ../instrument/stubs/instrument.h
+  ../iodev/gameport.h ../iodev/speaker.h ../instrument/stubs/instrument.h
+fpu_arith.o: fpu_arith.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
+  ../bx_debug/debug.h ../bxversion.h ../gui/siminterface.h \
+  ../state_file.h ../cpu/cpu.h ../cpu/lazy_flags.h ../cpu/hostasm.h \
+  ../cpu/apic.h ../cpu/i387.h ../fpu/softfloat.h ../fpu/tag_w.h \
+  ../fpu/status_w.h ../fpu/control_w.h ../cpu/xmm.h ../disasm/disasm.h \
+  ../memory/memory.h ../pc_system.h ../plugin.h ../extplugin.h \
+  ../gui/gui.h ../gui/textconfig.h ../gui/keymap.h ../iodev/iodev.h \
+  ../iodev/pci.h ../iodev/pci2isa.h ../iodev/pcivga.h ../iodev/vga.h \
+  ../iodev/ioapic.h ../iodev/biosdev.h ../iodev/cmos.h ../iodev/dma.h \
+  ../iodev/floppy.h ../iodev/harddrv.h ../iodev/cdrom.h \
+  ../iodev/vmware3.h ../iodev/keyboard.h ../iodev/parallel.h \
+  ../iodev/pic.h ../iodev/pit.h ../iodev/pit_wrap.h ../iodev/pit82c54.h \
+  ../iodev/virt_timer.h ../iodev/serial.h ../iodev/unmapped.h \
+  ../iodev/eth.h ../iodev/ne2k.h ../iodev/guest2host.h \
+  ../iodev/slowdown_timer.h ../iodev/extfpuirq.h ../iodev/gameport.h \
+  ../iodev/speaker.h ../instrument/stubs/instrument.h
+fpu_compare.o: fpu_compare.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
+  ../bx_debug/debug.h ../bxversion.h ../gui/siminterface.h \
+  ../state_file.h ../cpu/cpu.h ../cpu/lazy_flags.h ../cpu/hostasm.h \
+  ../cpu/apic.h ../cpu/i387.h ../fpu/softfloat.h ../fpu/tag_w.h \
+  ../fpu/status_w.h ../fpu/control_w.h ../cpu/xmm.h ../disasm/disasm.h \
+  ../memory/memory.h ../pc_system.h ../plugin.h ../extplugin.h \
+  ../gui/gui.h ../gui/textconfig.h ../gui/keymap.h ../iodev/iodev.h \
+  ../iodev/pci.h ../iodev/pci2isa.h ../iodev/pcivga.h ../iodev/vga.h \
+  ../iodev/ioapic.h ../iodev/biosdev.h ../iodev/cmos.h ../iodev/dma.h \
+  ../iodev/floppy.h ../iodev/harddrv.h ../iodev/cdrom.h \
+  ../iodev/vmware3.h ../iodev/keyboard.h ../iodev/parallel.h \
+  ../iodev/pic.h ../iodev/pit.h ../iodev/pit_wrap.h ../iodev/pit82c54.h \
+  ../iodev/virt_timer.h ../iodev/serial.h ../iodev/unmapped.h \
+  ../iodev/eth.h ../iodev/ne2k.h ../iodev/guest2host.h \
+  ../iodev/slowdown_timer.h ../iodev/extfpuirq.h ../iodev/gameport.h \
+  ../iodev/speaker.h ../instrument/stubs/instrument.h softfloatx80.h \
+  softfloat.h softfloat-specialize.h
+fpu_const.o: fpu_const.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
+  ../bx_debug/debug.h ../bxversion.h ../gui/siminterface.h \
+  ../state_file.h ../cpu/cpu.h ../cpu/lazy_flags.h ../cpu/hostasm.h \
+  ../cpu/apic.h ../cpu/i387.h ../fpu/softfloat.h ../fpu/tag_w.h \
+  ../fpu/status_w.h ../fpu/control_w.h ../cpu/xmm.h ../disasm/disasm.h \
+  ../memory/memory.h ../pc_system.h ../plugin.h ../extplugin.h \
+  ../gui/gui.h ../gui/textconfig.h ../gui/keymap.h ../iodev/iodev.h \
+  ../iodev/pci.h ../iodev/pci2isa.h ../iodev/pcivga.h ../iodev/vga.h \
+  ../iodev/ioapic.h ../iodev/biosdev.h ../iodev/cmos.h ../iodev/dma.h \
+  ../iodev/floppy.h ../iodev/harddrv.h ../iodev/cdrom.h \
+  ../iodev/vmware3.h ../iodev/keyboard.h ../iodev/parallel.h \
+  ../iodev/pic.h ../iodev/pit.h ../iodev/pit_wrap.h ../iodev/pit82c54.h \
+  ../iodev/virt_timer.h ../iodev/serial.h ../iodev/unmapped.h \
+  ../iodev/eth.h ../iodev/ne2k.h ../iodev/guest2host.h \
+  ../iodev/slowdown_timer.h ../iodev/extfpuirq.h ../iodev/gameport.h \
+  ../iodev/speaker.h ../instrument/stubs/instrument.h softfloatx80.h \
+  softfloat.h softfloat-specialize.h
+fpu_load_store.o: fpu_load_store.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
+  ../bx_debug/debug.h ../bxversion.h ../gui/siminterface.h \
+  ../state_file.h ../cpu/cpu.h ../cpu/lazy_flags.h ../cpu/hostasm.h \
+  ../cpu/apic.h ../cpu/i387.h ../fpu/softfloat.h ../fpu/tag_w.h \
+  ../fpu/status_w.h ../fpu/control_w.h ../cpu/xmm.h ../disasm/disasm.h \
+  ../memory/memory.h ../pc_system.h ../plugin.h ../extplugin.h \
+  ../gui/gui.h ../gui/textconfig.h ../gui/keymap.h ../iodev/iodev.h \
+  ../iodev/pci.h ../iodev/pci2isa.h ../iodev/pcivga.h ../iodev/vga.h \
+  ../iodev/ioapic.h ../iodev/biosdev.h ../iodev/cmos.h ../iodev/dma.h \
+  ../iodev/floppy.h ../iodev/harddrv.h ../iodev/cdrom.h \
+  ../iodev/vmware3.h ../iodev/keyboard.h ../iodev/parallel.h \
+  ../iodev/pic.h ../iodev/pit.h ../iodev/pit_wrap.h ../iodev/pit82c54.h \
+  ../iodev/virt_timer.h ../iodev/serial.h ../iodev/unmapped.h \
+  ../iodev/eth.h ../iodev/ne2k.h ../iodev/guest2host.h \
+  ../iodev/slowdown_timer.h ../iodev/extfpuirq.h ../iodev/gameport.h \
+  ../iodev/speaker.h ../instrument/stubs/instrument.h softfloatx80.h \
+  softfloat.h softfloat-specialize.h
+fpu_misc.o: fpu_misc.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
+  ../bx_debug/debug.h ../bxversion.h ../gui/siminterface.h \
+  ../state_file.h ../cpu/cpu.h ../cpu/lazy_flags.h ../cpu/hostasm.h \
+  ../cpu/apic.h ../cpu/i387.h ../fpu/softfloat.h ../fpu/tag_w.h \
+  ../fpu/status_w.h ../fpu/control_w.h ../cpu/xmm.h ../disasm/disasm.h \
+  ../memory/memory.h ../pc_system.h ../plugin.h ../extplugin.h \
+  ../gui/gui.h ../gui/textconfig.h ../gui/keymap.h ../iodev/iodev.h \
+  ../iodev/pci.h ../iodev/pci2isa.h ../iodev/pcivga.h ../iodev/vga.h \
+  ../iodev/ioapic.h ../iodev/biosdev.h ../iodev/cmos.h ../iodev/dma.h \
+  ../iodev/floppy.h ../iodev/harddrv.h ../iodev/cdrom.h \
+  ../iodev/vmware3.h ../iodev/keyboard.h ../iodev/parallel.h \
+  ../iodev/pic.h ../iodev/pit.h ../iodev/pit_wrap.h ../iodev/pit82c54.h \
+  ../iodev/virt_timer.h ../iodev/serial.h ../iodev/unmapped.h \
+  ../iodev/eth.h ../iodev/ne2k.h ../iodev/guest2host.h \
+  ../iodev/slowdown_timer.h ../iodev/extfpuirq.h ../iodev/gameport.h \
+  ../iodev/speaker.h ../instrument/stubs/instrument.h softfloatx80.h \
+  softfloat.h softfloat-specialize.h
+fpu_tags.o: fpu_tags.@CPP_SUFFIX@ softfloat.h ../config.h softfloat-specialize.h \
+  ../cpu/i387.h ../fpu/softfloat.h ../fpu/tag_w.h ../fpu/status_w.h \
+  ../fpu/control_w.h
+fpu_trans.o: fpu_trans.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h \
+  ../bx_debug/debug.h ../bxversion.h ../gui/siminterface.h \
+  ../state_file.h ../cpu/cpu.h ../cpu/lazy_flags.h ../cpu/hostasm.h \
+  ../cpu/apic.h ../cpu/i387.h ../fpu/softfloat.h ../fpu/tag_w.h \
+  ../fpu/status_w.h ../fpu/control_w.h ../cpu/xmm.h ../disasm/disasm.h \
+  ../memory/memory.h ../pc_system.h ../plugin.h ../extplugin.h \
+  ../gui/gui.h ../gui/textconfig.h ../gui/keymap.h ../iodev/iodev.h \
+  ../iodev/pci.h ../iodev/pci2isa.h ../iodev/pcivga.h ../iodev/vga.h \
+  ../iodev/ioapic.h ../iodev/biosdev.h ../iodev/cmos.h ../iodev/dma.h \
+  ../iodev/floppy.h ../iodev/harddrv.h ../iodev/cdrom.h \
+  ../iodev/vmware3.h ../iodev/keyboard.h ../iodev/parallel.h \
+  ../iodev/pic.h ../iodev/pit.h ../iodev/pit_wrap.h ../iodev/pit82c54.h \
+  ../iodev/virt_timer.h ../iodev/serial.h ../iodev/unmapped.h \
+  ../iodev/eth.h ../iodev/ne2k.h ../iodev/guest2host.h \
+  ../iodev/slowdown_timer.h ../iodev/extfpuirq.h ../iodev/gameport.h \
+  ../iodev/speaker.h ../instrument/stubs/instrument.h softfloatx80.h \
+  softfloat.h softfloat-specialize.h
+fsincos.o: fsincos.@CPP_SUFFIX@ softfloatx80.h softfloat.h ../config.h \
+  softfloat-specialize.h softfloat-macros.h softfloat-round-pack.h \
+  fpu_constant.h
+fyl2x.o: fyl2x.@CPP_SUFFIX@ softfloatx80.h softfloat.h ../config.h \
+  softfloat-specialize.h softfloat-macros.h softfloat-round-pack.h
+softfloat-round-pack.o: softfloat-round-pack.@CPP_SUFFIX@ softfloat.h ../config.h \
+  softfloat-round-pack.h softfloat-macros.h softfloat-specialize.h
+softfloat-specialize.o: softfloat-specialize.@CPP_SUFFIX@ softfloat.h ../config.h \
+  softfloat-specialize.h softfloat-macros.h
+softfloat.o: softfloat.@CPP_SUFFIX@ softfloat.h ../config.h softfloat-round-pack.h \
+  softfloat-macros.h softfloat-specialize.h
+softfloatx80.o: softfloatx80.@CPP_SUFFIX@ softfloatx80.h softfloat.h ../config.h \
+  softfloat-specialize.h softfloat-round-pack.h softfloat-macros.h
--- a/bochs/fpu/PORTING
+++ b/bochs/fpu/PORTING
@ -1,11 +0,0 @@
-Portable i387 emulator
-
-22 Nov 99  Version 2.02:
-
-First pseudo-portable version for little-endian machines.
-
-Beginning of big-endian support provided through the pre-processor
-symbol BIG_ENDIAN.
-
-Some of the code in fpu_entry.c and get_address.c is still in a
-non-portable form and these two files still contain assembler code.
--- a/bochs/fpu/README
+++ b/bochs/fpu/README
@ -1,427 +0,0 @@
- +---------------------------------------------------------------------------+
- |  wm-FPU-emu   an FPU emulator for 80386 and 80486SX microprocessors.      |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1995,1996,1997,1999                          |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@melbpc.org.au              |
- |                                                                           |
- |    This program is free software; you can redistribute it and/or modify   |
- |    it under the terms of the GNU General Public License version 2 as      |
- |    published by the Free Software Foundation.                             |
- |                                                                           |
- |    This program is distributed in the hope that it will be useful,        |
- |    but WITHOUT ANY WARRANTY; without even the implied warranty of         |
- |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          |
- |    GNU General Public License for more details.                           |
- |                                                                           |
- |    You should have received a copy of the GNU General Public License      |
- |    along with this program; if not, write to the Free Software            |
- |    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.              |
- |                                                                           |
- +---------------------------------------------------------------------------+
-
-
-
-wm-FPU-emu is an FPU emulator for Linux. It is derived from wm-emu387
-which was my 80387 emulator for early versions of djgpp (gcc under
-msdos); wm-emu387 was in turn based upon emu387 which was written by
-DJ Delorie for djgpp.  The interface to the Linux kernel is based upon
-the original Linux math emulator by Linus Torvalds.
-
-My target FPU for wm-FPU-emu is that described in the Intel486
-Programmer's Reference Manual (1992 edition). Unfortunately, numerous
-facets of the functioning of the FPU are not well covered in the
-Reference Manual. The information in the manual has been supplemented
-with measurements on real 80486's. Unfortunately, it is simply not
-possible to be sure that all of the peculiarities of the 80486 have
-been discovered, so there is always likely to be obscure differences
-in the detailed behaviour of the emulator and a real 80486.
-
-wm-FPU-emu does not implement all of the behaviour of the 80486 FPU,
-but is very close.  See "Limitations" later in this file for a list of
-some differences.
-
-Please report bugs, etc to me at:
-       billm@melbpc.org.au
-or     b.metzenthen@medoto.unimelb.edu.au
-
-For more information on the emulator and on floating point topics, see
-my web pages, currently at  http://www.suburbia.net/~billm/
-
-
--Bill Metzenthen
-  December 1999
-
-
----------------------- Internals of wm-FPU-emu -----------------------
-
-Numeric algorithms:
-(1) Add, subtract, and multiply. Nothing remarkable in these.
-(2) Divide has been tuned to get reasonable performance. The algorithm
-    is not the obvious one which most people seem to use, but is designed
-    to take advantage of the characteristics of the 80386. I expect that
-    it has been invented many times before I discovered it, but I have not
-    seen it. It is based upon one of those ideas which one carries around
-    for years without ever bothering to check it out.
-(3) The sqrt function has been tuned to get good performance. It is based
-    upon Newton's classic method. Performance was improved by capitalizing
-    upon the properties of Newton's method, and the code is once again
-    structured taking account of the 80386 characteristics.
-(4) The trig, log, and exp functions are based in each case upon quasi-
-    "optimal" polynomial approximations. My definition of "optimal" was
-    based upon getting good accuracy with reasonable speed.
-(5) The argument reducing code for the trig function effectively uses
-    a value of pi which is accurate to more than 128 bits. As a consequence,
-    the reduced argument is accurate to more than 64 bits for arguments up
-    to a few pi, and accurate to more than 64 bits for most arguments,
-    even for arguments approaching 2^63. This is far superior to an
-    80486, which uses a value of pi which is accurate to 66 bits.
-
-The code of the emulator is complicated slightly by the need to
-account for a limited form of re-entrancy. Normally, the emulator will
-emulate each FPU instruction to completion without interruption.
-However, it may happen that when the emulator is accessing the user
-memory space, swapping may be needed. In this case the emulator may be
-temporarily suspended while disk i/o takes place. During this time
-another process may use the emulator, thereby perhaps changing static
-variables. The code which accesses user memory is confined to five
-files:
-    fpu_entry.c
-    reg_ld_str.c
-    load_store.c
-    get_address.c
-    errors.c
-As from version 1.12 of the emulator, no static variables are used
-(apart from those in the kernel's per-process tables). The emulator is
-therefore now fully re-entrant, rather than having just the restricted
-form of re-entrancy which is required by the Linux kernel.
-
----------------------- Limitations of wm-FPU-emu -----------------------
-
-There are a number of differences between the current wm-FPU-emu
-(version 2.05) and the 80486 FPU (apart from bugs).  The differences
-are fewer than those which applied to the 1.xx series of the emulator.
-Some of the more important differences are listed below:
-
-The Roundup flag does not have much meaning for the transcendental
-functions and its 80486 value with these functions is likely to differ
-from its emulator value.
-
-In a few rare cases the Underflow flag obtained with the emulator will
-be different from that obtained with an 80486. This occurs when the
-following conditions apply simultaneously:
-(a) the operands have a higher precision than the current setting of the
-    precision control (PC) flags.
-(b) the underflow exception is masked.
-(c) the magnitude of the exact result (before rounding) is less than 2^-16382.
-(d) the magnitude of the final result (after rounding) is exactly 2^-16382.
-(e) the magnitude of the exact result would be exactly 2^-16382 if the
-    operands were rounded to the current precision before the arithmetic
-    operation was performed.
-If all of these apply, the emulator will set the Underflow flag but a real
-80486 will not.
-
-NOTE: Certain formats of Extended Real are UNSUPPORTED. They are
-unsupported by the 80486. They are the Pseudo-NaNs, Pseudoinfinities,
-and Unnormals. None of these will be generated by an 80486 or by the
-emulator. Do not use them. The emulator treats them differently in
-detail from the way an 80486 does.
-
-Self modifying code can cause the emulator to fail. An example of such
-code is:
-          movl %esp,[%ebx]
-	  fld1
-The FPU instruction may be (usually will be) loaded into the pre-fetch
-queue of the CPU before the mov instruction is executed. If the
-destination of the 'movl' overlaps the FPU instruction then the bytes
-in the prefetch queue and memory will be inconsistent when the FPU
-instruction is executed. The emulator will be invoked but will not be
-able to find the instruction which caused the device-not-present
-exception. For this case, the emulator cannot emulate the behaviour of
-an 80486DX.
-
-Handling of the address size override prefix byte (0x67) has not been
-extensively tested yet. A major problem exists because using it in
-vm86 mode can cause a general protection fault. Address offsets
-greater than 0xffff appear to be illegal in vm86 mode but are quite
-acceptable (and work) in real mode. A small test program developed to
-check the addressing, and which runs successfully in real mode,
-crashes dosemu under Linux and also brings Windows down with a general
-protection fault message when run under the MS-DOS prompt of Windows
-3.1. (The program simply reads data from a valid address).
-
-The emulator supports 16-bit protected mode, with one difference from
-an 80486DX.  A 80486DX will allow some floating point instructions to
-write a few bytes below the lowest address of the stack.  The emulator
-will not allow this in 16-bit protected mode: no instructions are
-allowed to write outside the bounds set by the protection.
-
----------------------- Performance of wm-FPU-emu -----------------------
-
-Speed.
-----
-
-The speed of floating point computation with the emulator will depend
-upon instruction mix. Relative performance is best for the instructions
-which require most computation. The simple instructions are adversely
-affected by the FPU instruction trap overhead.
-
-
-Timing: Some simple timing tests have been made on the emulator functions.
-The times include load/store instructions. All times are in microseconds
-measured on a 33MHz 386 with 64k cache. The Turbo C tests were under
-ms-dos, the next two columns are for emulators running with the djgpp
-ms-dos extender. The final column is for wm-FPU-emu in Linux 0.97,
-using libm4.0 (hard).
-
-function      Turbo C        djgpp 1.06        WM-emu387     wm-FPU-emu
-
-   +          60.5           154.8              76.5          139.4
-   -          61.1-65.5      157.3-160.8        76.2-79.5     142.9-144.7
-   *          71.0           190.8              79.6          146.6
-   /          61.2-75.0      261.4-266.9        75.3-91.6     142.2-158.1
-
- sin()        310.8          4692.0            319.0          398.5
- cos()        284.4          4855.2            308.0          388.7
- tan()        495.0          8807.1            394.9          504.7
- atan()       328.9          4866.4            601.1          419.5-491.9
-
- sqrt()       128.7          crashed           145.2          227.0
- log()        413.1-419.1    5103.4-5354.21    254.7-282.2    409.4-437.1
- exp()        479.1          6619.2            469.1          850.8
-
-
-The performance under Linux is improved by the use of look-ahead code.
-The following results show the improvement which is obtained under
-Linux due to the look-ahead code. Also given are the times for the
-original Linux emulator with the 4.1 'soft' lib.
-
- [ Linus' note: I changed look-ahead to be the default under linux, as
-   there was no reason not to use it after I had edited it to be
-   disabled during tracing ]
-
-            wm-FPU-emu w     original w
-            look-ahead       'soft' lib
-   +         106.4             190.2
-   -         108.6-111.6      192.4-216.2
-   *         113.4             193.1
-   /         108.8-124.4      700.1-706.2
-
- sin()       390.5            2642.0
- cos()       381.5            2767.4
- tan()       496.5            3153.3
- atan()      367.2-435.5     2439.4-3396.8
-
- sqrt()      195.1            4732.5
- log()       358.0-387.5     3359.2-3390.3
- exp()       619.3            4046.4
-
-
-These figures are now somewhat out-of-date. The emulator has become
-progressively slower for most functions as more of the 80486 features
-have been implemented.
-
-
----------------------- Accuracy of wm-FPU-emu -----------------------
-
-
-The accuracy of the emulator is in almost all cases equal to or better
-than that of an Intel 80486 FPU.
-
-The results of the basic arithmetic functions (+,-,*,/), and fsqrt
-match those of an 80486 FPU. They are the best possible; the error for
-these never exceeds 1/2 an lsb. The fprem and fprem1 instructions
-return exact results; they have no error.
-
-
-The following table compares the emulator accuracy for the sqrt(),
-trig and log functions against the Turbo C "emulator". For this table,
-each function was tested at about 400 points. Ideal worst-case results
-would be 64 bits. The reduced Turbo C accuracy of cos() and tan() for
-arguments greater than pi/4 can be thought of as being related to the
-precision of the argument x; e.g. an argument of pi/2-(1e-10) which is
-accurate to 64 bits can result in a relative accuracy in cos() of
-about 64 + log2(cos(x)) = 31 bits.
-
-
-Function      Tested x range            Worst result                Turbo C
-                                        (relative bits)
-
-sqrt(x)       1 .. 2                    64.1                         63.2
-atan(x)       1e-10 .. 200              64.2                         62.8
-cos(x)        0 .. pi/2-(1e-10)         64.4 (x <= pi/4)             62.4
-                                        64.1 (x = pi/2-(1e-10))      31.9
-sin(x)        1e-10 .. pi/2             64.0                         62.8
-tan(x)        1e-10 .. pi/2-(1e-10)     64.0 (x <= pi/4)             62.1
-                                        64.1 (x = pi/2-(1e-10))      31.9
-exp(x)        0 .. 1                    63.1 **                      62.9
-log(x)        1+1e-6 .. 2               63.8 **                      62.1
-
-** The accuracy for exp() and log() is low because the FPU (emulator)
-does not compute them directly; two operations are required.
-
-
-The emulator passes the "paranoia" tests (compiled with gcc 2.3.3 or
-later) for 'float' variables (24 bit precision numbers) when precision
-control is set to 24, 53 or 64 bits, and for 'double' variables (53
-bit precision numbers) when precision control is set to 53 bits (a
-properly performing FPU cannot pass the 'paranoia' tests for 'double'
-variables when precision control is set to 64 bits).
-
-The code for reducing the argument for the trig functions (fsin, fcos,
-fptan and fsincos) has been improved and now effectively uses a value
-for pi which is accurate to more than 128 bits precision. As a
-consequence, the accuracy of these functions for large arguments has
-been dramatically improved (and is now very much better than an 80486
-FPU). There is also now no degradation of accuracy for fcos and fptan
-for operands close to pi/2. Measured results are (note that the
-definition of accuracy has changed slightly from that used for the
-above table):
-
-Function      Tested x range          Worst result
-                                     (absolute bits)
-
-cos(x)        0 .. 9.22e+18              62.0
-sin(x)        1e-16 .. 9.22e+18          62.1
-tan(x)        1e-16 .. 9.22e+18          61.8
-
-It is possible with some effort to find very large arguments which
-give much degraded precision. For example, the integer number
-           8227740058411162616.0
-is within about 10e-7 of a multiple of pi. To find the tan (for
-example) of this number to 64 bits precision it would be necessary to
-have a value of pi which had about 150 bits precision. The FPU
-emulator computes the result to about 42.6 bits precision (the correct
-result is about -9.739715e-8). On the other hand, an 80486 FPU returns
-0.01059, which in relative terms is hopelessly inaccurate.
-
-For arguments close to critical angles (which occur at multiples of
-pi/2) the emulator is more accurate than an 80486 FPU. For very large
-arguments, the emulator is far more accurate.
-
-
-Prior to version 1.20 of the emulator, the accuracy of the results for
-the transcendental functions (in their principal range) was not as
-good as the results from an 80486 FPU. From version 1.20, the accuracy
-has been considerably improved and these functions now give measured
-worst-case results which are better than the worst-case results given
-by an 80486 FPU.
-
-The following table gives the measured results for the emulator. The
-number of randomly selected arguments in each case is about half a
-million.  The group of three columns gives the frequency of the given
-accuracy in number of times per million, thus the second of these
-columns shows that an accuracy of between 63.80 and 63.89 bits was
-found at a rate of 133 times per one million measurements for fsin.
-The results show that the fsin, fcos and fptan instructions return
-results which are in error (i.e. less accurate than the best possible
-result (which is 64 bits)) for about one per cent of all arguments
-between -pi/2 and +pi/2.  The other instructions have a lower
-frequency of results which are in error.  The last two columns give
-the worst accuracy which was found (in bits) and the approximate value
-of the argument which produced it.
-
-                                frequency (per M)
-                               -------------------   ---------------
-instr   arg range    # tests   63.7   63.8    63.9   worst   at arg
-                               bits   bits    bits    bits
-----  ------------  -------   ----   ----   -----   -----  --------
-fsin     (0,pi/2)     547756      0    133   10673   63.89  0.451317
-fcos     (0,pi/2)     547563      0    126   10532   63.85  0.700801
-fptan    (0,pi/2)     536274     11    267   10059   63.74  0.784876
-fpatan  4 quadrants   517087      0      8    1855   63.88  0.435121 (4q)
-fyl2x     (0,20)      541861      0      0    1323   63.94  1.40923  (x)
-fyl2xp1 (-.293,.414)  520256      0      0    5678   63.93  0.408542 (x)
-f2xm1     (-1,1)      538847      4    481    6488   63.79  0.167709
-
-
-Tests performed on an 80486 FPU showed results of lower accuracy. The
-following table gives the results which were obtained with an AMD
-486DX2/66 (other tests indicate that an Intel 486DX produces
-identical results).  The tests were basically the same as those used
-to measure the emulator (the values, being random, were in general not
-the same).  The total number of tests for each instruction are given
-at the end of the table, in case each about 100k tests were performed.
-Another line of figures at the end of the table shows that most of the
-instructions return results which are in error for more than 10
-percent of the arguments tested.
-
-The numbers in the body of the table give the approx number of times a
-result of the given accuracy in bits (given in the left-most column)
-was obtained per one million arguments. For three of the instructions,
-two columns of results are given: * The second column for f2xm1 gives
-the number cases where the results of the first column were for a
-positive argument, this shows that this instruction gives better
-results for positive arguments than it does for negative.  * In the
-cases of fcos and fptan, the first column gives the results when all
-cases where arguments greater than 1.5 were removed from the results
-given in the second column. Unlike the emulator, an 80486 FPU returns
-results of relatively poor accuracy for these instructions when the
-argument approaches pi/2. The table does not show those cases when the
-accuracy of the results were less than 62 bits, which occurs quite
-often for fsin and fptan when the argument approaches pi/2. This poor
-accuracy is discussed above in relation to the Turbo C "emulator", and
-the accuracy of the value of pi.
-
-
-bits   f2xm1  f2xm1 fpatan   fcos   fcos  fyl2x fyl2xp1  fsin  fptan  fptan
-62.0       0      0      0      0    437      0      0      0      0    925
-62.1       0      0     10      0    894      0      0      0      0   1023
-62.2      14      0      0      0   1033      0      0      0      0    945
-62.3      57      0      0      0   1202      0      0      0      0   1023
-62.4     385      0      0     10   1292      0     23      0      0   1178
-62.5    1140      0      0    119   1649      0     39      0      0   1149
-62.6    2037      0      0    189   1620      0     16      0      0   1169
-62.7    5086     14      0    646   2315     10    101     35     39   1402
-62.8    8818     86      0    984   3050     59    287    131    224   2036
-62.9   11340   1355      0   2126   4153     79    605    357    321   1948
-63.0   15557   4750      0   3319   5376    246   1281    862    808   2688
-63.1   20016   8288      0   4620   6628    511   2569   1723   1510   3302
-63.2   24945  11127     10   6588   8098   1120   4470   2968   2990   4724
-63.3   25686  12382     69   8774  10682   1906   6775   4482   5474   7236
-63.4   29219  14722     79  11109  12311   3094   9414   7259   8912  10587
-63.5   30458  14936    393  13802  15014   5874  12666   9609  13762  15262
-63.6   32439  16448   1277  17945  19028  10226  15537  14657  19158  20346
-63.7   35031  16805   4067  23003  23947  18910  20116  21333  25001  26209
-63.8   33251  15820   7673  24781  25675  24617  25354  24440  29433  30329
-63.9   33293  16833  18529  28318  29233  31267  31470  27748  29676  30601
-
-Per cent with error:
-        30.9           3.2          18.5    9.8   13.1   11.6          17.4
-Total arguments tested:
-       70194  70099 101784 100641 100641 101799 128853 114893 102675 102675
-
-
------------------------- Contributors -------------------------------
-
-A number of people have contributed to the development of the
-emulator, often by just reporting bugs, sometimes with suggested
-fixes, and a few kind people have provided me with access in one way
-or another to an 80486 machine. Contributors include (to those people
-who I may have forgotten, please forgive me):
-
-Linus Torvalds
-Tommy.Thorn@daimi.aau.dk
-Andrew.Tridgell@anu.edu.au
-Nick Holloway, alfie@dcs.warwick.ac.uk
-Hermano Moura, moura@dcs.gla.ac.uk
-Jon Jagger, J.Jagger@scp.ac.uk
-Lennart Benschop
-Brian Gallew, geek+@CMU.EDU
-Thomas Staniszewski, ts3v+@andrew.cmu.edu
-Martin Howell, mph@plasma.apana.org.au
-M Saggaf, alsaggaf@athena.mit.edu
-Peter Barker, PETER@socpsy.sci.fau.edu
-tom@vlsivie.tuwien.ac.at
-Dan Russel, russed@rpi.edu
-Daniel Carosone, danielce@ee.mu.oz.au
-cae@jpmorgan.com
-Hamish Coleman, t933093@minyos.xx.rmit.oz.au
-Bruce Evans, bde@kralizec.zeta.org.au
-Timo Korvola, Timo.Korvola@hut.fi
-Rick Lyons, rick@razorback.brisnet.org.au
-Rick, jrs@world.std.com
- 
-...and numerous others who responded to my request for help with
-a real 80486.
-
--- a/bochs/fpu/control_w.h
+++ b/bochs/fpu/control_w.h
@ -1,40 +1,52 @@
-/*---------------------------------------------------------------------------+
- |  control_w.h                                                              |
- |  $Id: control_w.h,v 1.4 2003-07-31 17:39:24 sshwarts Exp $
- |                                                                           |
- | Copyright (C) 1992,1993                                                   |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
- |                                                                           |
- +---------------------------------------------------------------------------*/
+/////////////////////////////////////////////////////////////////////////
+//
+//   Copyright (c) 2004 Stanislav Shwartsman
+//          Written by Stanislav Shwartsman <gate at fidonet.org.il>
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//

-#ifndef _CONTROLW_H_
-#define _CONTROLW_H_
+#ifndef _CONTROL_W_H_
+#define _CONTROL_W_H_

-#define CW_RC		(0x0C00)	/* rounding control */
-#define CW_PC		(0x0300)	/* precision control */
+/* ************ */
+/* Control Word */
+/* ************ */

-#define CW_Precision	(0x0020)	/* loss of precision mask */
-#define CW_Underflow	(0x0010)	/* underflow mask */
-#define CW_Overflow	(0x0008)	/* overflow mask */
-#define CW_ZeroDiv	(0x0004)	/* divide by zero mask */
-#define CW_Denormal	(0x0002)	/* denormalized operand mask */
-#define CW_Invalid	(0x0001)	/* invalid operation mask */
+#define FPU_CW_RC		(0x0C00)  /* rounding control */
+#define FPU_CW_PC		(0x0300)  /* precision control */

-#define CW_Exceptions  	(0x003f)	/* all masks */
+#define FPU_RC_RND		(0x0000)  /* rounding control */
+#define FPU_RC_DOWN		(0x0400)
+#define FPU_RC_UP		(0x0800)
+#define FPU_RC_CHOP		(0x0C00)

-#define RC_RND		(0x0000)
-#define RC_DOWN		(0x0400)
-#define RC_UP		(0x0800)
-#define RC_CHOP		(0x0C00)
+#define FPU_CW_Precision	(0x0020)  /* loss of precision mask */
+#define FPU_CW_Underflow	(0x0010)  /* underflow mask */
+#define FPU_CW_Overflow		(0x0008)  /* overflow mask */
+#define FPU_CW_Zero_Div		(0x0004)  /* divide by zero mask */
+#define FPU_CW_Denormal		(0x0002)  /* denormalized operand mask */
+#define FPU_CW_Invalid		(0x0001)  /* invalid operation mask */

-/* p 15-5: Precision control bits affect only the following:
+#define FPU_CW_Exceptions_Mask 	(0x003f)  /* all masks */
+
+/* Precision control bits affect only the following:
   ADD, SUB(R), MUL, DIV(R), and SQRT */
-#define PR_24_BITS        (0x000)
-#define PR_53_BITS        (0x200)
-#define PR_64_BITS        (0x300)
-#define PR_RESERVED_BITS  (0x100)
-/* FULL_PRECISION simulates all exceptions masked */
-#define FULL_PRECISION  (PR_64_BITS | RC_RND | 0x3f)
+#define FPU_PR_32_BITS          (0x000)
+#define FPU_PR_RESERVED_BITS    (0x100)
+#define FPU_PR_64_BITS          (0x200)
+#define FPU_PR_80_BITS          (0x300)

-#endif /* _CONTROLW_H_ */
+#endif
--- a/bochs/fpu/div_Xsig.c
+++ b/bochs/fpu/div_Xsig.c
@ -1,186 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  div_Xsig.S                                                               |
- |  $Id: div_Xsig.c,v 1.3 2003-10-04 12:32:55 sshwarts Exp $
- |                                                                           |
- | Division subroutine for 96 bit quantities                                 |
- |                                                                           |
- | Copyright (C) 1994,1995,1999                                              |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@melbpc.org.au              |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- | Divide the 96 bit quantity pointed to by a, by that pointed to by b, and  |
- | put the 96 bit result at the location d.                                  |
- |                                                                           |
- | The result may not be accurate to 96 bits. It is intended for use where   |
- | a result better than 64 bits is required. The result should usually be    |
- | good to at least 94 bits.                                                 |
- | The returned result is actually divided by one half. This is done to      |
- | prevent overflow.                                                         |
- |                                                                           |
- |  .aaaaaaaaaaaaaa / .bbbbbbbbbbbbb  ->  .dddddddddddd                      |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "fpu_emu.h"
-#include "poly.h"
-
-
-void div_Xsig(const Xsig *aa, const Xsig *b, Xsig *dest)
-{
-  Xsig a = *aa, xpr, result;
-  u32  prodh, prodl, den, wd;
-  u64  num, prod;
-
-#ifdef PARANOID
-  if ( (b->msw & 0x80000000) == 0 )
-    {
-      INTERNAL(0x240);
-      return;
-    }
-#endif
-
-  /* Shift a right */
-  a.lsw >>= 1;
-  if ( a.midw & 1 )
-    a.lsw |= 0x80000000;
-  a.midw >>= 1;
-  if ( a.msw & 1 )
-    a.midw |= 0x80000000;
-  a.msw >>= 1;
-
-  num = a.msw;
-  num <<= 32;
-  num |= a.midw;
-
-  den = b->msw + 1;
-  if ( den )
-    {
-      result.msw = num / den;
-    }
-  else
-    result.msw = a.msw;
-
-  xpr = *b;
-  mul32_Xsig(&xpr, result.msw);
-  a.msw -= xpr.msw;
-  wd = a.midw;
-  a.midw -= xpr.midw;
-  if ( a.midw > wd )
-    a.msw --;
-  wd = a.lsw;
-  a.lsw -= xpr.lsw;
-  if ( a.lsw > wd )
-    {
-      a.midw --;
-      if ( a.midw == 0xffffffff )
-	a.msw --;
-    }
-
-#ifdef PARANOID
-      if ( a.msw > 1 )
-	{
-	  INTERNAL(0x241);
-	}
-#endif
-
-  while ( (a.msw != 0) || (a.midw > b->msw) )
-    {
-      wd = a.midw;
-      a.midw -= b->msw;
-      if ( a.midw > wd )
-	a.msw --;
-      wd = a.lsw;
-      a.lsw -= b->midw;
-      if ( a.lsw > wd )
-	{
-	  a.midw --;
-	  if ( a.midw == 0xffffffff )
-	    a.msw --;
-	}
-      result.msw ++;
-    }
-
-  /* Whew! result.msw is now done. */
-
-  num = a.midw;
-  num <<= 32;
-  num |= a.lsw;
-
-  if ( den )
-    {
-      result.midw = num / den;
-    }
-  else
-    result.midw = a.midw;
-
-  prod = result.midw;
-  prod *= b->msw;
-  a.midw -= prod >> 32;
-  prodl = prod;
-  wd = a.lsw;
-  a.lsw -= prodl;
-  if ( a.lsw > wd )
-    a.midw --;
-
-  prod = result.midw;
-  prod *= b->midw;
-  prodh = prod >> 32;
-  wd = a.lsw;
-  a.lsw -= prodh;
-  if ( a.lsw > wd )
-    a.midw --;
-
-#ifdef PARANOID
-      if ( a.midw > 1 )
-	{
-          INTERNAL(0x242);
-	}
-#endif
-
-  while ( (a.midw != 0) || (a.lsw > b->msw) )
-    {
-      wd = a.lsw;
-      a.lsw -= b->msw;
-      if ( a.lsw > wd )
-	a.midw --;
-      result.midw ++;
-    }
-
-
-  /* Now result.msw is done, the lsw is next... */
-
-  num = a.lsw;
-  num <<= 32;
-
-  if ( den )
-    {
-      result.lsw = num / den;
-    }
-  else
-    result.lsw = a.lsw;
-
-  prod = result.lsw;
-  prod *= b->msw;
-  a.lsw -= prod >> 32;
-
-#ifdef PARANOID
-  if ( a.lsw > 2 )
-    {
-      INTERNAL(0x243);
-    }
-#endif
-
-  result.lsw -= a.lsw;
-
-  /* Hey! we're done. */
-
-  *dest = result;
-
-}
-
--- a/bochs/fpu/errors.c
+++ b/bochs/fpu/errors.c
@ -1,553 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  errors.c                                                                 |
- |  $Id: errors.c,v 1.18 2004-03-06 13:33:11 sshwarts Exp $
- |                                                                           |
- |  The error handling functions for wm-FPU-emu                              |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1996                                         |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@jacobi.maths.monash.edu.au                |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- | Note:                                                                     |
- |    The file contains code which accesses user memory.                     |
- |    Emulator static data may change when user memory is accessed, due to   |
- |    other processes using the emulator while swapping is in progress.      |
- +---------------------------------------------------------------------------*/
-
-#include <linux/signal.h>
-#include <stdio.h>
-
-#include "fpu_emu.h"
-#include "fpu_system.h"
-#include "exception.h"
-#include "status_w.h"
-#include "control_w.h"
-#include "reg_constant.h"
-
-#ifdef __cplusplus
-extern "C"
-#endif
-int printk(const char * fmt, ...);
-
-/*
-   Called for opcodes which are illegal and which are known to result in a
-   SIGILL with a real 80486.
-   */
-void FPU_illegal(void)
-{
-  math_abort(NULL, SIGILL);
-}
-
-static struct {
-  int type;
-  const char *name;
-} exception_names[] = {
-  { EX_StackOver, "stack overflow" },
-  { EX_StackUnder, "stack underflow" },
-  { EX_Precision, "loss of precision" },
-  { EX_Underflow, "underflow" },
-  { EX_Overflow, "overflow" },
-  { EX_ZeroDiv, "divide by zero" },
-  { EX_Denormal, "denormalized operand" },
-  { EX_Invalid, "invalid operation" },
-  { 0, NULL }
-};
-
-/*
- EX_INTERNAL is always given with a code which indicates where the
- error was detected.
-
- Internal error types:
-       0x14   in fpu_etc.c
-       0x1nn  in a *.c file:
-              0x101  in reg_add_sub.c
-              0x102  in reg_mul.c
-              0x104  in poly_atan.c
-              0x105  in reg_mul.c
-              0x107  in fpu_trig.c
-	      0x108  in reg_compare.c
-	      0x109  in reg_compare.c
-	      0x110  in reg_add_sub.c
-	      0x111  in fpe_entry.c
-	      0x112  in fpu_trig.c
-	      0x113  in errors.c
-	      0x115  in fpu_trig.c
-	      0x116  in fpu_trig.c
-	      0x117  in fpu_trig.c
-	      0x118  in fpu_trig.c
-	      0x119  in fpu_trig.c
-	      0x120  in poly_atan.c
-	      0x121  in reg_compare.c
-	      0x122  in reg_compare.c
-	      0x123  in reg_compare.c
-	      0x125  in fpu_trig.c
-	      0x126  in fpu_entry.c
-	      0x127  in poly_2xm1.c
-	      0x128  in fpu_entry.c
-	      0x129  in fpu_entry.c
-	      0x130  in get_address.c
-	      0x131  in get_address.c
-	      0x132  in get_address.c
-	      0x133  in get_address.c
-	      0x140  in load_store.c
-	      0x141  in load_store.c
-              0x150  in poly_sin.c
-              0x151  in poly_sin.c
-	      0x160  in reg_ld_str.c
-	      0x161  in reg_ld_str.c
-	      0x162  in reg_ld_str.c
-	      0x163  in reg_ld_str.c
-	      0x164  in reg_ld_str.c
-	      0x170  in fpu_tags.c
-	      0x171  in fpu_tags.c
-	      0x172  in fpu_tags.c
-	      0x180  in reg_convert.c
-*/
-
-void FPU_internal(int type)
-{
-   printk("FPU emulator: Internal error type 0x%04x\n", type);
-}
-
-void FPU_exception(int exception)
-{
-  /* Extract only the bits which we use to set the status word */
-  exception &= (SW_Exc_Mask);
-  /* Set the corresponding exception bit */
-  FPU_partial_status |= exception;
-  /* Set summary bits iff exception isn't masked */
-  if (FPU_partial_status & ~FPU_control_word & CW_Exceptions)
-	FPU_partial_status |= (SW_Summary | SW_Backward);
-  if (exception & (SW_Stack_Fault | EX_Precision))
-  {
-      if (!(exception & SW_C1))
-        /* This bit distinguishes over- from underflow for a stack fault,
-             and roundup from round-down for precision loss. */
-        FPU_partial_status &= ~SW_C1;
-  }
-}
-
-/* Real operation attempted on a NaN. */
-/* Returns < 0 if the exception is unmasked */
-int real_1op_NaN(FPU_REG *a)
-{
-  int signalling, isNaN;
-
-  isNaN = (exponent(a) == EXP_OVER) && (a->sigh & 0x80000000);
-
-  /* The default result for the case of two "equal" NaNs (signs may
-     differ) is chosen to reproduce 80486 behaviour */
-  signalling = isNaN && !(a->sigh & 0x40000000);
-
-  if (!signalling)
-    {
-      if (!isNaN)  /* pseudo-NaN, or other unsupported? */
-	{
-	  if (FPU_control_word & CW_Invalid)
-	    {
-	      /* Masked response */
-	      reg_copy(&CONST_QNaN, a);
-	    }
-	  EXCEPTION(EX_Invalid);
-	  return (!(FPU_control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
-	}
-      return TAG_Special;
-    }
-
-  if (FPU_control_word & CW_Invalid)
-    {
-      /* The masked response */
-      if (!(a->sigh & 0x80000000))  /* pseudo-NaN ? */
-	{
-	  reg_copy(&CONST_QNaN, a);
-	}
-      /* ensure a Quiet NaN */
-      a->sigh |= 0x40000000;
-    }
-
-  EXCEPTION(EX_Invalid);
-
-  return (!(FPU_control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
-}
-
-
-/* Real operation attempted on two operands, one a NaN. */
-/* Returns < 0 if the exception is unmasked */
-int real_2op_NaN(FPU_REG const *b, u_char tagb,
-		 int deststnr,
-		 FPU_REG const *defaultNaN)
-{
-  FPU_REG *dest = &st(deststnr);
-  FPU_REG const *a = dest;
-  u_char taga = FPU_gettagi(deststnr);
-  FPU_REG const *x;
-  int signalling, unsupported;
-
-  if (taga == TAG_Special)
-    taga = FPU_Special(a);
-  if (tagb == TAG_Special)
-    tagb = FPU_Special(b);
-
-  /* TW_NaN is also used for unsupported data types. */
-  unsupported = ((taga == TW_NaN)
-		 && !((exponent(a) == EXP_OVER) && (a->sigh & 0x80000000)))
-    || ((tagb == TW_NaN)
-	&& !((exponent(b) == EXP_OVER) && (b->sigh & 0x80000000)));
-  if (unsupported)
-    {
-      if (FPU_control_word & CW_Invalid)
-	{
-	  /* Masked response */
-	  FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
-	}
-      EXCEPTION(EX_Invalid);
-      return (!(FPU_control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
-    }
-
-  if (taga == TW_NaN)
-    {
-      x = a;
-      if (tagb == TW_NaN)
-	{
-	  signalling = !(a->sigh & b->sigh & 0x40000000);
-	  if (significand(b) > significand(a))
-	    x = b;
-	  else if (significand(b) == significand(a))
-	    {
-	      /* The default result for the case of two "equal" NaNs (signs may
-		 differ) is chosen to reproduce 80486 behaviour */
-	      x = defaultNaN;
-	    }
-	}
-      else
-	{
-	  /* return the quiet version of the NaN in a */
-	  signalling = !(a->sigh & 0x40000000);
-	}
-    }
-  else
-#ifdef PARANOID
-    if (tagb == TW_NaN)
-#endif /* PARANOID */
-    {
-      signalling = !(b->sigh & 0x40000000);
-      x = b;
-    }
-#ifdef PARANOID
-  else
-    {
-      signalling = 0;
-      INTERNAL(0x113);
-      x = &CONST_QNaN;
-    }
-#endif /* PARANOID */
-
-  if ((!signalling) || (FPU_control_word & CW_Invalid))
-    {
-      if (! x)
-	x = b;
-
-      if (!(x->sigh & 0x80000000))  /* pseudo-NaN ? */
-	x = &CONST_QNaN;
-
-      FPU_copy_to_regi(x, TAG_Special, deststnr);
-
-      if (!signalling)
-	return TAG_Special;
-
-      /* ensure a Quiet NaN */
-      dest->sigh |= 0x40000000;
-    }
-
-  EXCEPTION(EX_Invalid);
-
-  return (!(FPU_control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Special;
-}
-
-
-/* Invalid arith operation on Valid registers */
-/* Returns < 0 if the exception is unmasked */
-asmlinkage int arith_invalid(int deststnr)
-{
-  EXCEPTION(EX_Invalid);
-  
-  if (FPU_control_word & CW_Invalid)
-    {
-      /* The masked response */
-      FPU_copy_to_regi(&CONST_QNaN, TAG_Special, deststnr);
-    }
-  
-  return (!(FPU_control_word & CW_Invalid) ? FPU_Exception : 0) | TAG_Valid;
-}
-
-
-/* Divide a finite number by zero */
-asmlinkage int FPU_divide_by_zero(int deststnr, u_char sign)
-{
-  FPU_REG *dest = &st(deststnr);
-  int tag = TAG_Valid;
-
-  if (FPU_control_word & CW_ZeroDiv)
-    {
-      /* The masked response */
-      FPU_copy_to_regi(&CONST_INF, TAG_Special, deststnr);
-      setsign(dest, sign);
-      tag = TAG_Special;
-    }
- 
-  EXCEPTION(EX_ZeroDiv);
-
-  return (!(FPU_control_word & CW_ZeroDiv) ? FPU_Exception : 0) | tag;
-}
-
-
-/* This may be called often, so keep it lean */
-int set_precision_flag(int flags)
-{
-  if (FPU_control_word & CW_Precision)
-    {
-      FPU_partial_status &= ~(SW_C1 & flags);
-      FPU_partial_status |= flags;   /* The masked response */
-      return 0;
-    }
-  else
-    {
-      EXCEPTION(flags);
-      return 1;
-    }
-}
-
-
-/* This may be called often, so keep it lean */
-asmlinkage void set_precision_flag_up(void)
-{
-  if (FPU_control_word & CW_Precision)
-    FPU_partial_status |= (SW_Precision | SW_C1);   /* The masked response */
-  else
-    EXCEPTION(EX_Precision | SW_C1);
-}
-
-
-/* This may be called often, so keep it lean */
-asmlinkage void set_precision_flag_down(void)
-{
-  if (FPU_control_word & CW_Precision)
-    {   /* The masked response */
-      FPU_partial_status &= ~SW_C1;
-      FPU_partial_status |= SW_Precision;
-    }
-  else
-    EXCEPTION(EX_Precision);
-}
-
-
-asmlinkage int denormal_operand(void)
-{
-  if (FPU_control_word & CW_Denormal)
-    {   /* The masked response */
-      FPU_partial_status |= SW_Denorm_Op;
-      return TAG_Special;
-    }
-  else
-    {
-      EXCEPTION(EX_Denormal);
-      return TAG_Special | FPU_Exception;
-    }
-}
-
-
-asmlinkage int arith_overflow(FPU_REG *dest)
-{
-  int tag = TAG_Valid;
-
-  if (FPU_control_word & CW_Overflow)
-    {
-      /* The masked response */
-      reg_copy(&CONST_INF, dest);
-      tag = TAG_Special;
-    }
-  else
-    {
-      /* Subtract the magic number from the exponent */
-      addexponent(dest, (-3 * (1 << 13)));
-    }
-
-  EXCEPTION(EX_Overflow);
-  if (FPU_control_word & CW_Overflow)
-    {
-      /* The overflow exception is masked. */
-      /* By definition, precision is lost.
-	 The roundup bit (C1) is also set because we have
-	 "rounded" upwards to Infinity. */
-      EXCEPTION(EX_Precision | SW_C1);
-      return tag;
-    }
-
-  return tag;
-}
-
-
-asmlinkage int arith_round_overflow(FPU_REG *dest, u8 sign)
-{
-  int tag = TAG_Valid;
-  int largest;
-
-  if (FPU_control_word & CW_Overflow)
-    {
-      /* The masked response */
-      /* The response here depends upon the rounding mode */
-      switch (FPU_control_word & CW_RC)
-	{
-	case RC_CHOP:		/* Truncate */
-	  largest = 1;
-	  break;
-	case RC_UP:		/* Towards +infinity */
-	  largest = (sign == SIGN_NEG);
-	  break;
-	case RC_DOWN:		/* Towards -infinity */
-	  largest = (sign == SIGN_POS);
-	  break;
-	default:
-	  largest = 0;
-	  break;
-	}
-      if (! largest)
-	{
-	  reg_copy(&CONST_INF, dest);
-	  tag = TAG_Special;
-	}
-      else
-	{
-	  dest->exp = EXTENDED_Ebias+EXP_OVER-1;
-	  switch (FPU_control_word & CW_PC)
-	    {
-	    case 01:
-	    case PR_64_BITS:
-	      significand(dest) = BX_CONST64(0xffffffffffffffff);
-	      break;
-	    case PR_53_BITS:
-	      significand(dest) = BX_CONST64(0xfffffffffffff800);
-	      break;
-	    case PR_24_BITS:
-	      significand(dest) = BX_CONST64(0xffffff0000000000);
-	      break;
-	    }
-	}
-    }
-  else
-    {
-      /* Subtract the magic number from the exponent */
-      addexponent(dest, (-3 * (1 << 13)));
-      largest = 0;
-    }
-
-  EXCEPTION(EX_Overflow);
-  if (FPU_control_word & CW_Overflow)
-    {
-      /* The overflow exception is masked. */
-      if (largest)
-	{
-	  EXCEPTION(EX_Precision);
-	}
-      else
-	{
-	  /* By definition, precision is lost.
-	     The roundup bit (C1) is also set because we have
-	     "rounded" upwards to Infinity. */
-	  EXCEPTION(EX_Precision | SW_C1);
-	}
-      return tag;
-    }
-
-  return tag;
-}
-
-
-asmlinkage int arith_underflow(FPU_REG *dest)
-{
-  int tag = TAG_Valid;
-
-  if (FPU_control_word & CW_Underflow)
-    {
-      /* The masked response */
-      if (exponent16(dest) <= EXP_UNDER - 63)
-	{
-	  reg_copy(&CONST_Z, dest);
-	  FPU_partial_status &= ~SW_C1;       /* Round down. */
-	  tag = TAG_Zero;
-	}
-      else
-	{
-	  stdexp(dest);
-	}
-    }
-  else
-    {
-      /* Add the magic number to the exponent. */
-      addexponent(dest, (3 * (1 << 13)) + EXTENDED_Ebias);
-    }
-
-  EXCEPTION(EX_Underflow);
-  if (FPU_control_word & CW_Underflow)
-    {
-      /* The underflow exception is masked. */
-      EXCEPTION(EX_Precision);
-      return tag;
-    }
-
-  return tag;
-}
-
-
-void FPU_stack_overflow(void)
-{
- if (FPU_control_word & CW_Invalid)
-    {
-      /* The masked response */
-      FPU_tos--;
-      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
-    }
-
-  EXCEPTION(EX_StackOver);
-}
-
-
-void FPU_stack_underflow(void)
-{
- if (FPU_control_word & CW_Invalid)
-    {
-      /* The masked response */
-      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
-    }
-
-  EXCEPTION(EX_StackUnder);
-}
-
-
-void FPU_stack_underflow_i(int i)
-{
- if (FPU_control_word & CW_Invalid)
-    {
-      /* The masked response */
-      FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
-    }
-
-  EXCEPTION(EX_StackUnder);
-}
-
-
-void FPU_stack_underflow_pop(int i)
-{
- if (FPU_control_word & CW_Invalid)
-    {
-      /* The masked response */
-      FPU_copy_to_regi(&CONST_QNaN, TAG_Special, i);
-      FPU_pop();
-    }
-
-  EXCEPTION(EX_StackUnder);
-}
--- a/bochs/fpu/f2xm1.cc
+++ b/bochs/fpu/f2xm1.cc
@ -0,0 +1,168 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#define FLOAT128
+
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+
+static const floatx80 floatx80_negone  = packFloatx80(1, 0x3fff, BX_CONST64(0x8000000000000000));
+static const floatx80 floatx80_neghalf = packFloatx80(1, 0x3ffe, BX_CONST64(0x8000000000000000));
+static const float128 float128_ln2     = 
+    packFloat128(BX_CONST64(0x3ffe62e42fefa39e), BX_CONST64(0xf35793c7673007e6));
+
+#define LN2_SIG        BX_CONST64(0xb17217f7d1cf79ac)
+
+#define EXP_ARR_SIZE 15
+
+static float128 exp_arr[EXP_ARR_SIZE] =
+{
+    packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000)), /*  1 */
+    packFloat128(BX_CONST64(0x3ffe000000000000), BX_CONST64(0x0000000000000000)), /*  2 */
+    packFloat128(BX_CONST64(0x3ffc555555555555), BX_CONST64(0x5555555555555555)), /*  3 */
+    packFloat128(BX_CONST64(0x3ffa555555555555), BX_CONST64(0x5555555555555555)), /*  4 */
+    packFloat128(BX_CONST64(0x3ff8111111111111), BX_CONST64(0x1111111111111111)), /*  5 */
+    packFloat128(BX_CONST64(0x3ff56c16c16c16c1), BX_CONST64(0x6c16c16c16c16c17)), /*  6 */
+    packFloat128(BX_CONST64(0x3ff2a01a01a01a01), BX_CONST64(0xa01a01a01a01a01a)), /*  7 */
+    packFloat128(BX_CONST64(0x3fefa01a01a01a01), BX_CONST64(0xa01a01a01a01a01a)), /*  8 */
+    packFloat128(BX_CONST64(0x3fec71de3a556c73), BX_CONST64(0x38faac1c88e50017)), /*  9 */
+    packFloat128(BX_CONST64(0x3fe927e4fb7789f5), BX_CONST64(0xc72ef016d3ea6679)), /* 10 */
+    packFloat128(BX_CONST64(0x3fe5ae64567f544e), BX_CONST64(0x38fe747e4b837dc7)), /* 11 */
+    packFloat128(BX_CONST64(0x3fe21eed8eff8d89), BX_CONST64(0x7b544da987acfe85)), /* 12 */
+    packFloat128(BX_CONST64(0x3fde6124613a86d0), BX_CONST64(0x97ca38331d23af68)), /* 13 */
+    packFloat128(BX_CONST64(0x3fda93974a8c07c9), BX_CONST64(0xd20badf145dfa3e5)), /* 14 */
+    packFloat128(BX_CONST64(0x3fd6ae7f3e733b81), BX_CONST64(0xf11d8656b0ee8cb0))  /* 15 */
+};
+
+extern float128 EvalPoly(float128 x, float128 *arr, unsigned n, float_status_t &status);
+
+/* required -1 < x < 1 */
+static float128 poly_exp(float128 x, float_status_t &status)
+{
+/*
+    //               2     3     4     5     6     7     8     9
+    //  x           x     x     x     x     x     x     x     x
+    // e - 1 ~ x + --- + --- + --- + --- + --- + --- + --- + --- + ...
+    //              2!    3!    4!    5!    6!    7!    8!    9!
+    //
+    //                     2     3     4     5     6     7     8
+    //              x     x     x     x     x     x     x     x
+    //   = x [ 1 + --- + --- + --- + --- + --- + --- + --- + --- + ... ]
+    //              2!    3!    4!    5!    6!    7!    8!    9!
+    //
+    //           8                          8
+    //          --       2k                --        2k+1
+    //   p(x) = >  C  * x           q(x) = >  C   * x
+    //          --  2k                     --  2k+1
+    //          k=0                        k=0
+    //
+    //    x                     
+    //   e  - 1 ~ x * [ p(x) + x * q(x) ]
+    //
+*/
+    float128 t = EvalPoly(x, exp_arr, EXP_ARR_SIZE, status);
+    return float128_mul(t, x, status);
+}
+
+// =================================================
+//                                  x
+// FX2P1                   Compute 2  - 1
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//      x    x*ln(2)
+//     2  = e
+//
+// 2. ----------------------------------------------------------
+//                      2     3     4     5           n
+//      x        x     x     x     x     x           x
+//     e  = 1 + --- + --- + --- + --- + --- + ... + --- + ...
+//               1!    2!    3!    4!    5!          n!
+//
+
+floatx80 f2xm1(floatx80 a, float_status_t &status)
+{
+    Bit64u zSig0, zSig1;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a)) 
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    Bit64u aSig = extractFloatx80Frac(a);
+    Bit32s aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+     
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1))
+            return propagateFloatx80NaN(a, status);
+
+        return (aSign) ? floatx80_negone : a;
+    }
+
+    if (aExp == 0) {
+        if (aSig == 0) return a;
+        float_raise(status, float_flag_denormal | float_flag_inexact);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+
+    tiny_argument:
+        mul64To128(aSig, LN2_SIG, &zSig0, &zSig1);
+        if (0 < (Bit64s) zSig0) {
+            shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+            --aExp;
+        }
+        return
+            roundAndPackFloatx80(80, aSign, aExp, zSig0, zSig1, status);
+    }
+
+    float_raise(status, float_flag_inexact);
+
+    if (aExp < 0x3FFF)
+    {
+        if (aExp < EXP_BIAS-68)
+            goto tiny_argument;
+
+        /* ******************************** */
+        /* using float128 for approximation */
+        /* ******************************** */
+
+        float128 x = floatx80_to_float128(a, status);
+        x = float128_mul(x, float128_ln2, status);
+        x = poly_exp(x, status);
+        return float128_to_floatx80(x, status);
+    }
+    else 
+    {
+        if ((a.exp == 0xBFFF) && (! (aSig<<1)))
+           return floatx80_neghalf;
+
+        return a;
+    }
+}
--- a/bochs/fpu/ferr.cc
+++ b/bochs/fpu/ferr.cc
@ -0,0 +1,86 @@
+/////////////////////////////////////////////////////////////////////////
+//  Copyright (C) 2004  MandrakeSoft S.A.
+//
+//    MandrakeSoft S.A.
+//    43, rue d'Aboukir
+//    75002 Paris - France
+//    http://www.linux-mandrake.com/
+//    http://www.mandrakesoft.com/
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+/////////////////////////////////////////////////////////////////////////
+
+#define NEED_CPU_REG_SHORTCUTS 1
+#include "bochs.h"
+#define LOG_THIS BX_CPU_THIS_PTR
+
+
+#if BX_SUPPORT_FPU
+
+#include "softfloat-specialize.h"
+
+void BX_CPU_C::FPU_stack_overflow(void)
+{
+  /* The masked response */
+  if (BX_CPU_THIS_PTR the_i387.is_IA_masked())
+  {
+      BX_CPU_THIS_PTR the_i387.FPU_push();
+      BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_default_nan, FPU_Tag_Special, 0);
+  }
+  FPU_exception(FPU_EX_Stack_Overflow);
+}
+
+void BX_CPU_C::FPU_stack_underflow(int stnr, int pop_stack)
+{
+  /* The masked response */
+  if (BX_CPU_THIS_PTR the_i387.is_IA_masked())
+  {
+     BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_default_nan, FPU_Tag_Special, stnr);
+     if (pop_stack) 
+          BX_CPU_THIS_PTR the_i387.FPU_pop();
+  }
+  FPU_exception(FPU_EX_Stack_Underflow);
+}
+
+/* Returns 1 if unmasked exception occured */
+int BX_CPU_C::FPU_exception(int exception)
+{
+  int unmasked = 0;
+
+  /* Extract only the bits which we use to set the status word */
+  exception &= (FPU_SW_Exceptions_Mask);
+
+  /* Set the corresponding exception bits */
+  FPU_PARTIAL_STATUS |= exception;
+
+  /* Set summary bits iff exception isn't masked */
+  if (FPU_PARTIAL_STATUS & ~FPU_CONTROL_WORD & FPU_CW_Exceptions_Mask)
+  {
+      FPU_PARTIAL_STATUS |= (FPU_SW_Summary | FPU_SW_Backward);
+      unmasked = 1;
+  }
+
+  if (exception & (FPU_SW_Stack_Fault | FPU_EX_Precision))
+  {
+      if (! (exception & FPU_SW_C1))
+        /* This bit distinguishes over- from underflow for a stack fault,
+             and roundup from round-down for precision loss. */
+        FPU_PARTIAL_STATUS &= ~FPU_SW_C1;
+  }
+
+  return unmasked;
+}
+
+#endif
--- a/bochs/fpu/fpatan.cc
+++ b/bochs/fpu/fpatan.cc
@ -0,0 +1,285 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#define FLOAT128
+
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+#include "fpu_constant.h"
+
+#define FPATAN_ARR_SIZE 11
+
+static const float128 float128_one = 
+	packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000));
+static const float128 float128_sqrt3 =
+	packFloat128(BX_CONST64(0x3fffbb67ae8584ca), BX_CONST64(0xa73b25742d7078b8));
+static const floatx80 floatx80_pi  = 
+	packFloatx80(0, 0x4000, BX_CONST64(0xc90fdaa22168c235));
+static const floatx80 floatx80_pi2 = 
+	packFloatx80(0, 0x3fff, BX_CONST64(0xc90fdaa22168c235));
+static const floatx80 floatx80_pi4 = 
+	packFloatx80(0, 0x3ffe, BX_CONST64(0xc90fdaa22168c235));
+static const floatx80 floatx80_pi6 =
+	packFloatx80(0, 0x3ffe, BX_CONST64(0x860a91c16b9b2c23));
+
+static float128 atan_arr[FPATAN_ARR_SIZE] =
+{
+    packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000)), /*  1 */
+    packFloat128(BX_CONST64(0xbffd555555555555), BX_CONST64(0x5555555555555555)), /*  3 */
+    packFloat128(BX_CONST64(0x3ffc999999999999), BX_CONST64(0x999999999999999a)), /*  5 */
+    packFloat128(BX_CONST64(0xbffc249249249249), BX_CONST64(0x2492492492492492)), /*  7 */
+    packFloat128(BX_CONST64(0x3ffbc71c71c71c71), BX_CONST64(0xc71c71c71c71c71c)), /*  9 */
+    packFloat128(BX_CONST64(0xbffb745d1745d174), BX_CONST64(0x5d1745d1745d1746)), /* 11 */
+    packFloat128(BX_CONST64(0x3ffb3b13b13b13b1), BX_CONST64(0x3b13b13b13b13b14)), /* 13 */
+    packFloat128(BX_CONST64(0xbffb111111111111), BX_CONST64(0x1111111111111111)), /* 15 */
+    packFloat128(BX_CONST64(0x3ffae1e1e1e1e1e1), BX_CONST64(0xe1e1e1e1e1e1e1e2)), /* 17 */
+    packFloat128(BX_CONST64(0xbffaaf286bca1af2), BX_CONST64(0x86bca1af286bca1b)), /* 19 */
+    packFloat128(BX_CONST64(0x3ffa861861861861), BX_CONST64(0x8618618618618618))  /* 21 */
+};
+
+extern float128 OddPoly(float128 x, float128 *arr, unsigned n, float_status_t &status);
+
+/* |x| < 1/4 */
+static float128 poly_atan(float128 x1, float_status_t &status)
+{
+/*
+    //                 3     5     7     9     11     13     15     17
+    //                x     x     x     x     x      x      x      x
+    // atan(x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- + ----
+    //                3     5     7     9     11     13     15     17
+    //
+    //                 2     4     6     8     10     12     14     16
+    //                x     x     x     x     x      x      x      x
+    //   = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- + ---- ]
+    //                3     5     7     9     11     13     15     17
+    //
+    //           5                          5
+    //          --       4k                --        4k+2
+    //   p(x) = >  C  * x           q(x) = >  C   * x
+    //          --  2k                     --  2k+1
+    //          k=0                        k=0
+    //
+    //                            2
+    //    atan(x) ~ x * [ p(x) + x * q(x) ]
+    //
+*/
+    return OddPoly(x1, atan_arr, FPATAN_ARR_SIZE, status);
+}
+
+// =================================================
+// FPATAN                  Compute y * log (x)
+//                                        2
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//
+//   atan(-x) = -atan(x)
+//
+// 2. ----------------------------------------------------------
+//
+//                             x + y
+//   atan(x) + atan(y) = atan -------, xy < 1
+//                             1-xy
+//
+//                             x + y
+//   atan(x) + atan(y) = atan ------- + PI, x > 0, xy > 1
+//                             1-xy
+//
+//                             x + y
+//   atan(x) + atan(y) = atan ------- - PI, x < 0, xy > 1
+//                             1-xy
+//
+// 3. ----------------------------------------------------------
+//
+//   atan(x) = atan(INF) + atan(- 1/x)
+//
+//                           x-1
+//   atan(x) = PI/4 + atan( ----- )
+//                           x+1
+//
+//                           x * sqrt(3) - 1 
+//   atan(x) = PI/6 + atan( ----------------- )
+//                             x + sqrt(3)
+//
+// 4. ----------------------------------------------------------
+//                   3     5     7     9                 2n+1
+//                  x     x     x     x              n  x
+//   atan(x) = x - --- + --- - --- + --- - ... + (-1)  ------ + ...
+//                  3     5     7     9                 2n+1
+//
+
+floatx80 fpatan(floatx80 a, floatx80 b, float_status_t &status)
+{
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a)) {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    Bit64u aSig = extractFloatx80Frac(a);
+    Bit32s aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+    Bit64u bSig = extractFloatx80Frac(b);
+    Bit32s bExp = extractFloatx80Exp(b);
+    int bSign = extractFloatx80Sign(b);
+
+    int zSign = aSign ^ bSign;
+     
+    if (bExp == 0x7FFF)
+    {
+        if ((Bit64u) (bSig<<1))
+            return propagateFloatx80NaN(a, b, status);
+
+        if (aExp == 0x7FFF) {
+            if ((Bit64u) (aSig<<1))
+                return propagateFloatx80NaN(a, b, status);
+
+            if (aSign) {   /* return 3PI/4 */
+                return roundAndPackFloatx80(80, bSign, 
+                        FLOATX80_3PI4_EXP, FLOAT_3PI4_HI, FLOAT_3PI4_LO, status);
+            }                        
+            else {         /* return  PI/4 */
+                return roundAndPackFloatx80(80, bSign, 
+                        FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
+            }
+        }
+
+        if (aSig && (aExp == 0)) 
+            float_raise(status, float_flag_denormal);
+
+        /* return PI/2 */
+        return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
+    }
+    if (aExp == 0x7FFF)
+    {
+        if ((Bit64u) (aSig<<1)) 
+            return propagateFloatx80NaN(a, b, status);
+
+        if (bSig && (bExp == 0)) 
+            float_raise(status, float_flag_denormal);
+
+return_PI_or_ZERO:
+
+        if (aSign) {   /* return PI */
+            return roundAndPackFloatx80(80, bSign, FLOATX80_PI_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
+        } else {       /* return  0 */
+            return packFloatx80(bSign, 0, 0);
+        }
+    }
+    if (bExp == 0)
+    {
+        if (bSig == 0) {
+             if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+             goto return_PI_or_ZERO;
+        }
+
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    if (aExp == 0)
+    {
+        if (aSig == 0)   /* return PI/2 */
+            return roundAndPackFloatx80(80, bSign, FLOATX80_PI2_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
+
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+
+    float_raise(status, float_flag_inexact);
+
+    /* |a| = |b| ==> return PI/4 */
+    if (aSig == bSig && aExp == bExp)
+        return roundAndPackFloatx80(80, bSign, FLOATX80_PI4_EXP, FLOAT_PI_HI, FLOAT_PI_LO, status);
+         
+    /* ******************************** */
+    /* using float128 for approximation */
+    /* ******************************** */
+
+    float128 a128 = normalizeRoundAndPackFloat128(0, aExp-0x10, aSig, 0, status);
+    float128 b128 = normalizeRoundAndPackFloat128(0, bExp-0x10, bSig, 0, status);
+    float128 x;
+    int swap = 0, add_pi6 = 0, add_pi4 = 0;
+
+    if (aExp > bExp || (aExp == bExp && aSig > bSig))
+    {
+        x = float128_div(b128, a128, status);
+    }
+    else {
+	x = float128_div(a128, b128, status);
+	swap = 1;
+    }
+
+    Bit32s xExp = extractFloat128Exp(x);
+    floatx80 result;
+
+    if (xExp <= EXP_BIAS-40) {
+        result = float128_to_floatx80(x, status);
+        goto approximation_completed;
+    }
+
+    if (x.hi >= BX_CONST64(0x3ffe800000000000))	// 3/4 < x < 1
+    {
+        /* 
+        arctan(x) = arctan((x-1)/(x+1)) + pi/4
+        */
+        float128 t1 = float128_sub(x, float128_one, status);
+        float128 t2 = float128_add(x, float128_one, status);
+        x = float128_div(t1, t2, status);
+        add_pi4 = 1;
+    }
+    else
+    {
+        /* argument correction */
+        if (xExp >= 0x3FFD)                     // 1/4 < x < 3/4 
+        {			                
+            /* 
+            arctan(x) = arctan((x*sqrt(3)-1)/(x+sqrt(3))) + pi/6
+            */
+            float128 t1 = float128_mul(x, float128_sqrt3, status);
+            float128 t2 = float128_add(x, float128_sqrt3, status);
+            x = float128_sub(t1, float128_one, status);
+            x = float128_div(x, t2, status);
+            add_pi6 = 1;
+        }
+    }
+
+    x = poly_atan(x, status);
+    result = float128_to_floatx80(x, status);
+    if (add_pi6) result = floatx80_add(result, floatx80_pi6, status);
+    if (add_pi4) result = floatx80_add(result, floatx80_pi4, status);
+
+approximation_completed:
+    if (swap) result = floatx80_sub(floatx80_pi2, result, status);
+    if (zSign) floatx80_chs(result);
+    int rSign = extractFloatx80Sign(result);
+    if (!bSign && rSign)
+        return floatx80_add(result, floatx80_pi, status);
+    if (bSign && !rSign)
+        return floatx80_sub(result, floatx80_pi, status);
+    return result;
+}
--- a/bochs/fpu/fprem.cc
+++ b/bochs/fpu/fprem.cc
@ -0,0 +1,168 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+#include "softfloat-macros.h"
+
+/* executes single exponent reduction cycle */
+static Bit64u remainder_kernel(Bit64u aSig0, Bit64u bSig, int expDiff, Bit64u *zSig0, Bit64u *zSig1)
+{
+    Bit64u term0, term1;
+    Bit64u aSig1 = 0;
+
+    shortShift128Left(aSig1, aSig0, expDiff, &aSig1, &aSig0);
+    Bit64u q = estimateDiv128To64(aSig1, aSig0, bSig);
+    mul64To128(bSig, q, &term0, &term1);
+    sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
+    while ((Bit64s)(*zSig1) < 0) {
+        --q;
+        add128(*zSig1, *zSig0, 0, bSig, zSig1, zSig0);
+    }
+    return q;
+}
+
+static floatx80 do_fprem(floatx80 a, floatx80 b, Bit64u &q, int rounding_mode, float_status_t &status)
+{
+    Bit32s aExp, bExp, zExp, expDiff;
+    Bit64u aSig0, aSig1, bSig;
+    int aSign;
+    q = 0;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig0 = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig0<<1)
+             || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) 
+        {
+            return propagateFloatx80NaN(a, b, status);
+        }
+        goto invalid;
+    }
+    if (bExp == 0x7FFF) {
+        if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        return a;
+    }
+    if (bExp == 0) {
+        if (bSig == 0) {
+ invalid:
+            float_raise(status, float_flag_invalid);
+            return floatx80_default_nan;
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    if (aExp == 0) {
+        if ((Bit64u) (aSig0<<1) == 0) return a;
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+    }
+    expDiff = aExp - bExp;
+    aSig1 = 0;
+
+    if (expDiff >= 64) {
+        int n = (expDiff & 0x1f) | 0x20;
+        remainder_kernel(aSig0, bSig, n, &aSig0, &aSig1);
+        zExp = aExp - n;
+        q = (Bit64u) -1;
+    }
+    else {
+        zExp = bExp;
+
+        if (expDiff < 0) {
+            if (expDiff < -1)
+               return (a.fraction & BX_CONST64(0x8000000000000000)) ? 
+                    packFloatx80(aSign, aExp, aSig0) : a;
+            shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
+            expDiff = 0;
+        }
+
+        if (expDiff > 0) {
+            q = remainder_kernel(aSig0, bSig, expDiff, &aSig0, &aSig1);
+        }
+        else {
+            if (bSig <= aSig0) {
+               aSig0 -= bSig;
+               q = 1;
+            }
+        }
+
+        if (rounding_mode == float_round_nearest_even)
+        {
+            Bit64u term0, term1;
+            shift128Right(bSig, 0, 1, &term0, &term1);
+
+            if (! lt128(aSig0, aSig1, term0, term1))
+            {
+               int lt = lt128(term0, term1, aSig0, aSig1);
+               int eq = eq128(aSig0, aSig1, term0, term1);
+                
+               if ((eq && (q & 1)) || lt) {
+                  aSign = !aSign;
+                  ++q;
+               }
+               if (lt) sub128(bSig, 0, aSig0, aSig1, &aSig0, &aSig1);
+            }
+        }
+    }
+
+    return normalizeRoundAndPackFloatx80(80, aSign, zExp, aSig0, aSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_ieee754_remainder(floatx80 a, floatx80 b, Bit64u &q, float_status_t &status)
+{
+    return do_fprem(a, b, q, float_round_nearest_even, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with  respect to  the corresponding value `b'. Unlike previous function
+| the  function  does not compute  the remainder  specified  in  the IEC/IEEE 
+| Standard  for Binary  Floating-Point  Arithmetic.  This  function  operates
+| differently  from the  previous  function in  the way  that it  rounds  the 
+| quotient of 'a' divided by 'b' to an integer.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_remainder(floatx80 a, floatx80 b, Bit64u &q, float_status_t &status)
+{
+    return do_fprem(a, b, q, float_round_to_zero, status);
+}
--- a/bochs/fpu/fpu_arith.c
+++ b/bochs/fpu/fpu_arith.c
@ -1,154 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  fpu_arith.c                                                              |
- |  $Id: fpu_arith.c,v 1.5 2003-11-01 18:36:19 sshwarts Exp $
- |                                                                           |
- | Code to implement the FPU register/register arithmetic instructions       |
- |                                                                           |
- | Copyright (C) 1992,1993,1997                                              |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@suburbia.net                              |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_system.h"
-#include "fpu_emu.h"
-#include "control_w.h"
-#include "status_w.h"
-
-
-void fadd__()
-{
-  /* fadd st,st(i) */
-  int i = FPU_rm;
-  clear_C1();
-  FPU_add(&st(i), FPU_gettagi(i), 0, FPU_control_word);
-}
-
-void fmul__()
-{
-  /* fmul st,st(i) */
-  int i = FPU_rm;
-  clear_C1();
-  FPU_mul(&st(i), FPU_gettagi(i), 0, FPU_control_word);
-}
-
-void fsub__()
-{
-  /* fsub st,st(i) */
-  clear_C1();
-  FPU_sub(0, REGNO2PTR(FPU_rm), FPU_control_word);
-}
-
-void fsubr_()
-{
-  /* fsubr st,st(i) */
-  clear_C1();
-  FPU_sub(REV, REGNO2PTR(FPU_rm), FPU_control_word);
-}
-
-void fdiv__()
-{
-  /* fdiv st,st(i) */
-  clear_C1();
-  FPU_div(0, REGNO2PTR(FPU_rm), FPU_control_word);
-}
-
-void fdivr_()
-{
-  /* fdivr st,st(i) */
-  clear_C1();
-  FPU_div(REV, REGNO2PTR(FPU_rm), FPU_control_word);
-}
-
-void fadd_i()
-{
-  /* fadd st(i),st */
-  int i = FPU_rm;
-  clear_C1();
-  FPU_add(&st(i), FPU_gettagi(i), i, FPU_control_word);
-}
-
-void fmul_i()
-{
-  /* fmul st(i),st */
-  clear_C1();
-  FPU_mul(&st(0), FPU_gettag0(), FPU_rm, FPU_control_word);
-}
-
-void fsubri()
-{
-  /* fsubr st(i),st */
-  clear_C1();
-  FPU_sub(DEST_RM, REGNO2PTR(FPU_rm), FPU_control_word);
-}
-
-void fsub_i()
-{
-  /* fsub st(i),st */
-  clear_C1();
-  FPU_sub(REV|DEST_RM, REGNO2PTR(FPU_rm), FPU_control_word);
-}
-
-void fdivri()
-{
-  /* fdivr st(i),st */
-  clear_C1();
-  FPU_div(DEST_RM, REGNO2PTR(FPU_rm), FPU_control_word);
-}
-
-void fdiv_i()
-{
-  /* fdiv st(i),st */
-  clear_C1();
-  FPU_div(REV|DEST_RM, REGNO2PTR(FPU_rm), FPU_control_word);
-}
-
-void faddp_()
-{
-  /* faddp st(i),st */
-  int i = FPU_rm;
-  clear_C1();
-  if ( FPU_add(&st(i), FPU_gettagi(i), i, FPU_control_word) >= 0 )
-    FPU_pop();
-}
-
-void fmulp_()
-{
-  /* fmulp st(i),st */
-  clear_C1();
-  if ( FPU_mul(&st(0), FPU_gettag0(), FPU_rm, FPU_control_word) >= 0 )
-    FPU_pop();
-}
-
-void fsubrp()
-{
-  /* fsubrp st(i),st */
-  clear_C1();
-  if ( FPU_sub(DEST_RM, REGNO2PTR(FPU_rm), FPU_control_word) >= 0 )
-    FPU_pop();
-}
-
-void fsubp_()
-{
-  /* fsubp st(i),st */
-  clear_C1();
-  if ( FPU_sub(REV|DEST_RM, REGNO2PTR(FPU_rm), FPU_control_word) >= 0 )
-    FPU_pop();
-}
-
-void fdivrp()
-{
-  /* fdivrp st(i),st */
-  clear_C1();
-  if ( FPU_div(DEST_RM, REGNO2PTR(FPU_rm), FPU_control_word) >= 0 )
-    FPU_pop();
-}
-
-void fdivp_()
-{
-  /* fdivp st(i),st */
-  clear_C1();
-  if ( FPU_div(REV|DEST_RM, REGNO2PTR(FPU_rm), FPU_control_word) >= 0 )
-    FPU_pop();
-}
--- a/bochs/fpu/fpu_arith.cc
+++ b/bochs/fpu/fpu_arith.cc
--- a/bochs/fpu/fpu_aux.c
+++ b/bochs/fpu/fpu_aux.c
@ -1,190 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  fpu_aux.c                                                                |
- |  $Id: fpu_aux.c,v 1.7 2003-11-01 18:36:19 sshwarts Exp $
- |                                                                           |
- | Code to implement some of the FPU auxiliary instructions.                 |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1997                                         |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@suburbia.net                              |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_system.h"
-#include "exception.h"
-#include "fpu_emu.h"
-#include "status_w.h"
-#include "control_w.h"
-
-static void fnop(void) { }
-
-void fclex(void)
-{
-  FPU_partial_status &= ~(SW_Backward|SW_Summary|SW_Stack_Fault|SW_Precision|
-		   SW_Underflow|SW_Overflow|SW_Zero_Div|SW_Denorm_Op|
-		   SW_Invalid);
-  no_ip_update = 1;
-}
-
-/* Needs to be externally visible */
-void finit()
-{
-  FPU_control_word = 0x037f;
-  FPU_partial_status = 0;
-  FPU_tos = 0;            /* We don't keep top in the status word internally. */
-  FPU_tag_word = 0xffff;
-  /* The behaviour is different from that detailed in
-     Section 15.1.6 of the Intel manual */
-  FPU_operand_address.offset = 0;
-  FPU_operand_address.selector = 0;
-  FPU_instruction_address.offset = 0;
-  FPU_instruction_address.selector = 0;
-  FPU_instruction_address.opcode = 0;
-  no_ip_update = 1;
-}
-
-/*
- * These are nops on the i387..
- */
-#define feni fnop
-#define fdisi fnop
-#define fsetpm fnop
-
-static FUNC const finit_table[] = {
-  feni, fdisi, fclex, finit,
-  fsetpm, FPU_illegal, FPU_illegal, FPU_illegal
-};
-
-void finit_()
-{
-  (finit_table[FPU_rm])();
-}
-
-static void fstsw_ax(void)
-{
-  fpu_set_ax(status_word());
-  no_ip_update = 1;
-}
-
-static FUNC const fstsw_table[] = {
-  fstsw_ax, FPU_illegal, FPU_illegal, FPU_illegal,
-  FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
-};
-
-void fstsw_()
-{
-  (fstsw_table[FPU_rm])();
-}
-
-
-static FUNC const fp_nop_table[] = {
-  fnop, FPU_illegal, FPU_illegal, FPU_illegal,
-  FPU_illegal, FPU_illegal, FPU_illegal, FPU_illegal
-};
-
-void fp_nop()
-{
-  (fp_nop_table[FPU_rm])();
-}
-
-void fld_i_()
-{
-  FPU_REG *st_new_ptr;
-  int i;
-  u_char tag;
-
-  if (FPU_stackoverflow(&st_new_ptr))
-    { FPU_stack_overflow(); 
-      return; 
-    }
-
-  /* fld st(i) */
-  i = FPU_rm;
-  if ( NOT_EMPTY(i) )
-    {
-      reg_copy(&st(i), st_new_ptr);
-      tag = FPU_gettagi(i);
-      FPU_push();
-      FPU_settag0(tag);
-    }
-  else
-    {
-      if ( FPU_control_word & CW_Invalid )
-	{
-	  /* The masked response */
-	  FPU_stack_underflow();
-	}
-      else
-	EXCEPTION(EX_StackUnder);
-    }
-
-}
-
-void fxch_i()
-{
-  /* fxch st(i) */
-  FPU_REG t;
-  int i = FPU_rm;
-  FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
-  s32 tag_word = FPU_tag_word;
-  int regnr = FPU_tos & 7, regnri = ((regnr + i) & 7);
-  u_char st0_tag = (tag_word >> (regnr*2)) & 3;
-  u_char sti_tag = (tag_word >> (regnri*2)) & 3;
-
-  clear_C1();
-
-  if ( st0_tag == TAG_Empty )
-    {
-      if ( sti_tag == TAG_Empty )
-	{
-	  FPU_stack_underflow();
-	  FPU_stack_underflow_i(i);
-	  return;
-	}
-      if ( FPU_control_word & CW_Invalid )
-	{
-	  /* Masked response */
-	  FPU_copy_to_reg0(sti_ptr, sti_tag);
-	}
-      FPU_stack_underflow_i(i);
-      return;
-    }
-  if ( sti_tag == TAG_Empty )
-    {
-      if ( FPU_control_word & CW_Invalid )
-	{
-	  /* Masked response */
-	  FPU_copy_to_regi(st0_ptr, st0_tag, i);
-	}
-      FPU_stack_underflow();
-      return;
-    }
-
-  reg_copy(st0_ptr, &t);
-  reg_copy(sti_ptr, st0_ptr);
-  reg_copy(&t, sti_ptr);
-
-  tag_word &= ~(3 << (regnr*2)) & ~(3 << (regnri*2));
-  tag_word |= (sti_tag << (regnr*2)) | (st0_tag << (regnri*2));
-  FPU_tag_word = tag_word;
-}
-
-void ffree_()
-{
-  /* ffree st(i) */
-  FPU_settagi(FPU_rm, TAG_Empty);
-}
-
-void fst_i_()
-{
-  /* fst st(i) */
-  FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
-}
-
-void fstp_i()
-{
-  /* fstp st(i) */
-  FPU_copy_to_regi(&st(0), FPU_gettag0(), FPU_rm);
-  FPU_pop();
-}
--- a/bochs/fpu/fpu_compare.c
+++ b/bochs/fpu/fpu_compare.c
@ -1,184 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  fpu_compare.c                                                            |
- |  $Id: fpu_compare.c,v 1.2 2003-11-01 18:45:16 sshwarts Exp $
- |                                                                           |
- | Code to implement FMOVcc and other P6 FPU instructions.                   |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_system.h"
-#include "exception.h"
-#include "fpu_emu.h"
-#include "status_w.h"
-
-  /* EFLAGS: */
-
-  /* 31|30|29|28|27|26|25|24|23|22|21|20|19|18|17|16
-   * ==|==|=====|==|==|==|==|==|==|==|==|==|==|==|==
-   *  0| 0| 0| 0| 0| 0| 0| 0| 0| 0|ID|VP|VF|AC|VM|RF
-   *
-   * 15|14|13|12|11|10| 9| 8| 7| 6| 5| 4| 3| 2| 1| 0
-   * ==|==|=====|==|==|==|==|==|==|==|==|==|==|==|==
-   *  0|NT| IOPL|OF|DF|IF|TF|SF|ZF| 0|AF| 0|PF| 1|CF
-   */
-
-#define EFLAGS_CF 0x01
-#define EFLAGS_PF 0x04
-#define EFLAGS_AF 0x10
-#define EFLAGS_ZF 0x40
-
-void FPU_fcmovb()
-{
-  int i = FPU_rm;
-  FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
-  u_char st0_tag = FPU_gettagi(0);
-  u_char sti_tag = FPU_gettagi(i);
-  u32 eflags = fpu_get_eflags();
-
-  if (st0_tag == TAG_Empty || sti_tag == TAG_Empty) {
-        clear_C1();
-        FPU_stack_underflow();
-  }
-
-  if (eflags & EFLAGS_CF)
-        FPU_copy_to_reg0(sti_ptr, sti_tag);
-}
-
-void FPU_fcmove()
-{
-  int i = FPU_rm;
-  FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
-  u_char st0_tag = FPU_gettagi(0);
-  u_char sti_tag = FPU_gettagi(i);
-  u32 eflags = fpu_get_eflags();
-
-  if (st0_tag == TAG_Empty || sti_tag == TAG_Empty) {
-        clear_C1();
-        FPU_stack_underflow();
-  }
-
-  if (eflags & EFLAGS_ZF)
-        FPU_copy_to_reg0(sti_ptr, sti_tag);
-}
-
-void FPU_fcmovbe()
-{
-  int i = FPU_rm;
-  FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
-  u_char st0_tag = FPU_gettagi(0);
-  u_char sti_tag = FPU_gettagi(i);
-  u32 eflags = fpu_get_eflags();
-
-  if (st0_tag == TAG_Empty || sti_tag == TAG_Empty) {
-        clear_C1();
-        FPU_stack_underflow();
-  }
-
-  if (eflags & (EFLAGS_ZF | EFLAGS_CF))
-        FPU_copy_to_reg0(sti_ptr, sti_tag);
-}
-
-void FPU_fcmovu()
-{
-  int i = FPU_rm;
-  FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
-  u_char st0_tag = FPU_gettagi(0);
-  u_char sti_tag = FPU_gettagi(i);
-  u32 eflags = fpu_get_eflags();
-
-  if (st0_tag == TAG_Empty || sti_tag == TAG_Empty) {
-        clear_C1();
-        FPU_stack_underflow();
-  }
-
-  if (eflags & EFLAGS_PF)
-        FPU_copy_to_reg0(sti_ptr, sti_tag);
-}
-
-void FPU_fcmovnb()
-{
-  int i = FPU_rm;
-  FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
-  u_char st0_tag = FPU_gettagi(0);
-  u_char sti_tag = FPU_gettagi(i);
-  u32 eflags = fpu_get_eflags();
-
-  if (st0_tag == TAG_Empty || sti_tag == TAG_Empty) {
-        clear_C1();
-        FPU_stack_underflow();
-  }
-
-  if (!(eflags & EFLAGS_CF))
-        FPU_copy_to_reg0(sti_ptr, sti_tag);
-}
-
-void FPU_fcmovne()
-{
-  int i = FPU_rm;
-  FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
-  u_char st0_tag = FPU_gettagi(0);
-  u_char sti_tag = FPU_gettagi(i);
-  u32 eflags = fpu_get_eflags();
-
-  if (st0_tag == TAG_Empty || sti_tag == TAG_Empty) {
-        clear_C1();
-        FPU_stack_underflow();
-  }
-
-  if (!(eflags & EFLAGS_ZF))
-        FPU_copy_to_reg0(sti_ptr, sti_tag);
-}
-
-void FPU_fcmovnbe()
-{
-  int i = FPU_rm;
-  FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
-  u_char st0_tag = FPU_gettagi(0);
-  u_char sti_tag = FPU_gettagi(i);
-  u32 eflags = fpu_get_eflags();
-
-  if (st0_tag == TAG_Empty || sti_tag == TAG_Empty) {
-        clear_C1();
-        FPU_stack_underflow();
-  }
-
-  if (!(eflags & EFLAGS_CF) && !(eflags & EFLAGS_ZF))
-        FPU_copy_to_reg0(sti_ptr, sti_tag);
-}
-
-void FPU_fcmovnu()
-{
-  int i = FPU_rm;
-  FPU_REG *st0_ptr = &st(0), *sti_ptr = &st(i);
-  u_char st0_tag = FPU_gettagi(0);
-  u_char sti_tag = FPU_gettagi(i);
-  u32 eflags = fpu_get_eflags();
-
-  if (st0_tag == TAG_Empty || sti_tag == TAG_Empty) {
-        clear_C1();
-        FPU_stack_underflow();
-  }
-
-  if (!(eflags & EFLAGS_PF))
-        FPU_copy_to_reg0(sti_ptr, sti_tag);
-}
-
-void FPU_fucomip()
-{
-    printk("WARNING: FUCOMIP instruction still not implemented");
-}
-
-void FPU_fcomip()
-{
-    printk("WARNING: FCOMIP instruction still not implemented");
-}
-
-void FPU_fucomi()
-{
-    printk("WARNING: FUCOMI instruction still not implemented");
-}
-
-void FPU_fcomi()
-{
-    printk("WARNING: FCOMI instruction still not implemented");
-}
--- a/bochs/fpu/fpu_compare.cc
+++ b/bochs/fpu/fpu_compare.cc
@ -0,0 +1,629 @@
+/////////////////////////////////////////////////////////////////////////
+//  Copyright (C) 2004  MandrakeSoft S.A.
+//
+//    MandrakeSoft S.A.
+//    43, rue d'Aboukir
+//    75002 Paris - France
+//    http://www.linux-mandrake.com/
+//    http://www.mandrakesoft.com/
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+/////////////////////////////////////////////////////////////////////////
+
+
+#define NEED_CPU_REG_SHORTCUTS 1
+#include "bochs.h"
+#define LOG_THIS BX_CPU_THIS_PTR
+
+extern float_status_t FPU_pre_exception_handling(Bit16u control_word);
+
+#if BX_SUPPORT_FPU
+
+#include "softfloatx80.h"
+
+static int status_word_flags_fpu_compare(int float_relation)
+{
+  switch(float_relation) {
+     case float_relation_unordered:
+         return (FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+
+     case float_relation_greater:
+         return (0);
+
+     case float_relation_less:
+         return (FPU_SW_C0);
+
+     case float_relation_equal:
+         return (FPU_SW_C3);
+  }
+
+  return (-1);	// should never get here
+}
+#endif
+
+#if BX_SUPPORT_FPU || BX_SUPPORT_SSE >= 1
+void BX_CPU_C::write_eflags_fpu_compare(int float_relation)
+{
+  switch(float_relation) {
+   case float_relation_unordered:
+      setEFlagsOSZAPC(EFlagsZFMask | EFlagsPFMask | EFlagsCFMask);
+      break;
+
+   case float_relation_greater:
+      setEFlagsOSZAPC(0);
+      break;
+
+   case float_relation_less:
+      setEFlagsOSZAPC(EFlagsCFMask);
+      break;
+
+   case float_relation_equal:
+      setEFlagsOSZAPC(EFlagsZFMask);
+      break;
+
+   default:
+      BX_PANIC(("write_eflags: unknown floating point compare relation"));
+  }
+}
+#endif
+
+void BX_CPU_C::FCOM_STi(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(i->rm()))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          setcc(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  float_status_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), BX_READ_FPU_REG(i->rm()), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  setcc(status_word_flags_fpu_compare(rc));
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FCOM(P)_STi: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FCOMI_ST0_STj(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->b1() & 4;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(i->rm()))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          setEFlagsOSZAPC(EFlagsZFMask | EFlagsPFMask | EFlagsCFMask);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  float_status_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), BX_READ_FPU_REG(i->rm()), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FCOMI(P)_ST0_STj: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FUCOMI_ST0_STj(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->b1() & 4;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(i->rm()))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          setEFlagsOSZAPC(EFlagsZFMask | EFlagsPFMask | EFlagsCFMask);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  float_status_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare_quiet(BX_READ_FPU_REG(0), BX_READ_FPU_REG(i->rm()), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FUCOMI(P)_ST0_STj: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FUCOM_STi(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(i->rm()))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          setcc(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  float_status_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare_quiet(BX_READ_FPU_REG(0), BX_READ_FPU_REG(i->rm()), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  setcc(status_word_flags_fpu_compare(rc));
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FUCOM(P)_STi: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FCOM_SINGLE_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          setcc(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  float32 load_reg;
+  read_virtual_dword(i->seg(), RMAddr(i), &load_reg);
+
+  float_status_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), 
+  	float32_to_floatx80(load_reg, status), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  setcc(status_word_flags_fpu_compare(rc));
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FCOM(P)_SINGLE_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FCOM_DOUBLE_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          setcc(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  float64 load_reg;
+  read_virtual_qword(i->seg(), RMAddr(i), &load_reg);
+
+  float_status_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), 
+  	float64_to_floatx80(load_reg, status), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  setcc(status_word_flags_fpu_compare(rc));
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FCOM(P)_DOUBLE_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FICOM_WORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          setcc(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  Bit16s load_reg;
+  read_virtual_word(i->seg(), RMAddr(i), (Bit16u*)(&load_reg));
+
+  float_status_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), 
+  	int32_to_floatx80((Bit32s)(load_reg)), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  setcc(status_word_flags_fpu_compare(rc));
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FICOM(P)_WORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FICOM_DWORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          setcc(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  Bit32s load_reg;
+  read_virtual_dword(i->seg(), RMAddr(i), (Bit32u*)(&load_reg));
+
+  float_status_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), 
+  	int32_to_floatx80(load_reg), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  setcc(status_word_flags_fpu_compare(rc));
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FICOM(P)_DWORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DE D9 */
+void BX_CPU_C::FCOMPP(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          setcc(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+
+          BX_CPU_THIS_PTR the_i387.FPU_pop();
+          BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  float_status_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), BX_READ_FPU_REG(1), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  setcc(status_word_flags_fpu_compare(rc));
+
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FCOMPP: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DA E9 */
+void BX_CPU_C::FUCOMPP(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          setcc(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+
+          BX_CPU_THIS_PTR the_i387.FPU_pop();
+          BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  float_status_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare_quiet(BX_READ_FPU_REG(0), BX_READ_FPU_REG(1), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  setcc(status_word_flags_fpu_compare(rc));
+
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FUCOMPP: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FCMOV_ST0_STj(bxInstruction_c *i)
+{
+#if (BX_CPU_LEVEL >= 6) || (BX_CPU_LEVEL_HACKED >= 6)
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  int sti_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(i->rm());
+
+  if (st0_tag == FPU_Tag_Empty || sti_tag == FPU_Tag_Empty)
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  floatx80 sti_reg = BX_READ_FPU_REG(i->rm());
+
+  bx_bool condition = 0;
+  switch(i->nnn())
+  {
+     case 0: condition = get_CF(); break;
+     case 1: condition = get_ZF(); break;
+     case 2: condition = get_CF() || get_ZF(); break;
+     case 3: condition = get_PF(); break;
+     default:
+        BX_PANIC(("FCMOV_ST0_STj: default case"));
+  }
+  if (i->b1() & 1) 
+     condition = !condition;
+
+  if (condition)
+     BX_WRITE_FPU_REGISTER_AND_TAG(sti_reg, sti_tag, 0);
+ 
+#else
+  BX_INFO(("FCMOV_ST0_STj: required P6 FPU, configure --enable-fpu, cpu-level=6"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* D9 E4 */
+void BX_CPU_C::FTST(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          setcc(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+      }
+      return;
+  }
+
+  float_status_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), Const_Z, status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  setcc(status_word_flags_fpu_compare(rc));
+#else
+  BX_INFO(("FTST: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 E5 */
+void BX_CPU_C::FXAM(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  floatx80 reg = BX_READ_FPU_REG(0);
+  int sign = floatx80_sign(reg);
+
+  /* 
+   * Examine the contents of the ST(0) register and sets the condition 
+   * code flags C0, C2 and C3 in the FPU status word to indicate the 
+   * class of value or number in the register.
+   */
+
+  if (IS_TAG_EMPTY(0))
+  {
+      setcc(FPU_SW_C3|FPU_SW_C1|FPU_SW_C0);
+  }
+  else
+  {
+      float_class_t aClass = floatx80_class(reg);
+
+      switch(aClass)
+      {
+        case float_zero:
+           setcc(FPU_SW_C3|FPU_SW_C1);
+           break;
+       
+        case float_NaN:
+           // unsupported handled as NaNs
+           if (floatx80_is_unsupported(reg)) {
+               setcc(FPU_SW_C1); 
+           } else {
+               setcc(FPU_SW_C1|FPU_SW_C0);
+           }
+           break;
+       
+        case float_negative_inf:
+        case float_positive_inf:
+           setcc(FPU_SW_C2|FPU_SW_C1|FPU_SW_C0);
+           break;
+       
+        case float_denormal:
+           setcc(FPU_SW_C3|FPU_SW_C2|FPU_SW_C1);
+           break;
+       
+        case float_normalized:
+           setcc(FPU_SW_C2|FPU_SW_C1);
+           break;
+      }
+  }
+
+  /* 
+   * The C1 flag is set to the sign of the value in ST(0), regardless 
+   * of whether the register is empty or full.
+   */
+  if (! sign)
+    clear_C1();
+
+#else
+  BX_INFO(("FXAM: required FPU, configure --enable-fpu"));
+#endif
+}
--- a/bochs/fpu/fpu_const.cc
+++ b/bochs/fpu/fpu_const.cc
@ -0,0 +1,202 @@
+/////////////////////////////////////////////////////////////////////////
+//  Copyright (C) 2004  MandrakeSoft S.A.
+//
+//    MandrakeSoft S.A.
+//    43, rue d'Aboukir
+//    75002 Paris - France
+//    http://www.linux-mandrake.com/
+//    http://www.mandrakesoft.com/
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+/////////////////////////////////////////////////////////////////////////
+
+
+#define NEED_CPU_REG_SHORTCUTS 1
+#include "bochs.h"
+#define LOG_THIS BX_CPU_THIS_PTR
+
+
+#if BX_SUPPORT_FPU
+
+#include "softfloatx80.h"
+
+const floatx80 Const_QNaN = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+const floatx80 Const_Z    = packFloatx80(0, 0x0000, 0);
+const floatx80 Const_1    = packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
+const floatx80 Const_L2T  = packFloatx80(0, 0x4000, BX_CONST64(0xd49a784bcd1b8afe));
+const floatx80 Const_L2E  = packFloatx80(0, 0x3fff, BX_CONST64(0xb8aa3b295c17f0bc));
+const floatx80 Const_PI   = packFloatx80(0, 0x4000, BX_CONST64(0xc90fdaa22168c235));
+const floatx80 Const_LG2  = packFloatx80(0, 0x3ffd, BX_CONST64(0x9a209a84fbcff799));
+const floatx80 Const_LN2  = packFloatx80(0, 0x3ffe, BX_CONST64(0xb17217f7d1cf79ac));
+const floatx80 Const_INF  = packFloatx80(0, 0x7fff, BX_CONST64(0x8000000000000000));
+
+/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP
+   (and not one of RC_RND or RC_UP).
+   */
+#define DOWN_OR_CHOP()  (FPU_CONTROL_WORD & FPU_CW_RC & FPU_RC_DOWN)
+
+BX_CPP_INLINE floatx80 FPU_round_const(const floatx80 &a, int adj)
+{
+  floatx80 result = a;
+  result.fraction += adj;
+  return result;
+}
+
+#endif
+
+void BX_CPU_C::FLDL2T(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(FPU_round_const(Const_L2T, 
+	(FPU_CONTROL_WORD == FPU_RC_UP) ? 1 : 0), FPU_Tag_Valid, 0);
+#else
+  BX_INFO(("FLDL2T: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLDL2E(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(FPU_round_const(Const_L2E, 
+	DOWN_OR_CHOP() ? -1 : 0), FPU_Tag_Valid, 0);
+#else
+  BX_INFO(("FLDL2E: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLDPI(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(FPU_round_const(Const_PI, 
+	DOWN_OR_CHOP() ? -1 : 0), FPU_Tag_Valid, 0);
+#else
+  BX_INFO(("FLDPI: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLDLG2(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(FPU_round_const(Const_LG2, 
+	DOWN_OR_CHOP() ? -1 : 0), FPU_Tag_Valid, 0);
+#else
+  BX_INFO(("FLDLG2: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLDLN2(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(FPU_round_const(Const_LN2, 
+	DOWN_OR_CHOP() ? -1 : 0), FPU_Tag_Valid, 0);
+#else
+  BX_INFO(("FLDLN2: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLD1(bxInstruction_c *i) 
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(Const_1, FPU_Tag_Valid, 0);
+#else
+  BX_INFO(("FLD1: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLDZ(bxInstruction_c *i)                      
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(Const_Z, FPU_Tag_Zero, 0);
+#else
+  BX_INFO(("FLDZ: required FPU, configure --enable-fpu"));
+#endif
+}
--- a/bochs/fpu/fpu_constant.h
+++ b/bochs/fpu/fpu_constant.h
@ -0,0 +1,67 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#ifndef _FPU_CONSTANTS_H_
+#define _FPU_CONSTANTS_H_
+
+#include <config.h>
+
+// Pentium CPU uses only 68-bit precision M_PI approximation
+// #define BETTER_THAN_PENTIUM
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+//////////////////////////////
+// PI, PI/2, PI/4 constants
+//////////////////////////////
+
+#define FLOATX80_PI_EXP  (0x4000)
+
+// 128-bit PI fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_PI_HI (BX_CONST64(0xC90FDAA22168C234))
+#define FLOAT_PI_LO (BX_CONST64(0xC4C6628B80DC1CD1))
+#else
+#define FLOAT_PI_HI (BX_CONST64(0xC90FDAA22168C234))
+#define FLOAT_PI_LO (BX_CONST64(0xC000000000000000))
+#endif
+
+#define FLOATX80_PI2_EXP  (0x3FFF)
+#define FLOATX80_PI4_EXP  (0x3FFE)
+
+//////////////////////////////
+// 3PI/4 constant
+//////////////////////////////
+
+#define FLOATX80_3PI4_EXP (0x4000)
+
+// 128-bit 3PI/4 fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_3PI4_HI (BX_CONST64(0x96cbe3f9990e91a7))
+#define FLOAT_3PI4_LO (BX_CONST64(0x9394C9E8A0A5159C))
+#else
+#define FLOAT_3PI4_HI (BX_CONST64(0x96cbe3f9990e91a7))
+#define FLOAT_3PI4_LO (BX_CONST64(0x9000000000000000))
+#endif
+
+#endif
--- a/bochs/fpu/fpu_emu.h
+++ b/bochs/fpu/fpu_emu.h
@ -1,185 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  fpu_emu.h                                                                |
- |  $Id: fpu_emu.h,v 1.23 2004-02-20 00:54:22 danielg4 Exp $
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1997                                         |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail   billm@suburbia.net             |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-
-#ifndef _FPU_EMU_H_
-#define _FPU_EMU_H_
-
-#include <linux/linkage.h>
-
-/*
- * Define PECULIAR_486 to get a closer approximation to 80486 behaviour,
- * rather than behaviour which appears to be cleaner.
- * This is a matter of opinion: for all I know, the 80486 may simply
- * be complying with the IEEE spec. Maybe one day I'll get to see the
- * spec...
- */
-#define PECULIAR_486
-
-#define EXP_BIAS	(0)
-#define EXP_OVER	(0x4000)    /* smallest invalid large exponent */
-#define	EXP_UNDER	(-0x3fff)   /* largest invalid small exponent */
-#define EXP_WAY_UNDER   (-0x6000)   /* Below the smallest denormal, but still a 16 bit nr */
-
-#define EXP_Infinity    EXP_OVER
-#define EXP_NaN         EXP_OVER
-
-#define EXTENDED_Ebias (0x3fff)
-#define EXTENDED_Emin (-0x3ffe)  /* smallest valid exponent */
-
-#define SIGN_POS	(0)
-#define SIGN_NEG	(0x80)
-
-#define SIGN_Positive	(0)
-#define SIGN_Negative	(0x8000)
-
-/* Keep the order TAG_Valid, TAG_Zero, TW_Denormal */
-/* The following fold to 2 (Special) in the Tag Word */
-#define TW_Denormal      4      /* De-normal */
-#define TW_Infinity	 5 	/* + or - infinity */
-#define	TW_NaN		 6      /* Not a Number */
-#define	TW_Unsupported	 7 	/* Not supported by an 80486 */
-
-#define TAG_Valid	 0 	/* valid */
-#define TAG_Zero	 1 	/* zero */
-#define TAG_Special	 2 	/* De-normal, + or - infinity, or NaN */
-#define TAG_Empty	 3 	/* empty */
-
-/* Special st() number to identify loaded data (not on stack). */
-#define LOADED_DATA	(10101)	
-
-/* A few flags (must be >= 0x10). */
-#define REV             0x10
-#define DEST_RM         0x20
-#define LOADED          0x40
-
-#define FPU_Exception   (0x80000000)   /* Added to tag returns. */
-
-#include "fpu_system.h"
-
-#define FWAIT_OPCODE 0x9b
-#define OP_SIZE_PREFIX 0x66
-#define ADDR_SIZE_PREFIX 0x67
-
-#if defined(__MWERKS__) && defined(macintosh)
-#pragma options align=packed
-#endif
-struct address {
-  bx_address offset;
-#ifdef EMU_BIG_ENDIAN
-  u32 empty:5;
-  u32 opcode:11;
-  u32 selector:16;
-#else
-  u32 selector:16;
-  u32 opcode:11;
-  u32 empty:5;
-#endif
-} GCC_ATTRIBUTE((packed));
-
-typedef void (*FUNC)(void);
-typedef void (*FUNC_ST0)(FPU_REG *st0_ptr, u_char st0_tag);
-
-typedef struct { u_char address_size, operand_size, segment; }
-        overrides GCC_ATTRIBUTE((packed));
-/* This structure is 32 bits: */
-typedef struct { overrides override;
-		 u_char default_mode; } 
-    fpu_addr_modes GCC_ATTRIBUTE((packed));
-#if defined(__MWERKS__) && defined(macintosh)
-#pragma options align=reset
-#endif
-
-/* PROTECTED has a restricted meaning in the emulator; it is used
-   to signal that the emulator needs to do special things to ensure
-   that protection is respected in a segmented model. */
-#define PROTECTED 4
-#define SIXTEEN   1         /* We rely upon this being 1 (true) */
-#define VM86      SIXTEEN
-#define PM16      (SIXTEEN | PROTECTED)
-#define SEG32     PROTECTED
-
-#define fpu_register(x)  ( *((FPU_REG *)(FPU_register_base + sizeof(FPU_REG) * (x & 7) )))
-#define	st(x)      ( *((FPU_REG *)(FPU_register_base + sizeof(FPU_REG) * ((FPU_tos+x) & 7) )))
-
-#define	NOT_EMPTY(i)	(!FPU_empty_i(i))
-#define	NOT_EMPTY_ST0	(st0_tag ^ TAG_Empty)
-
-/* FPU_push() does not affect the tags */
-#define FPU_push()	{ FPU_tos--; }
-
-/* register accessors */
-#ifdef EMU_BIG_ENDIAN
-
-#define MAKE_REG(s,e,l,h) { 0,0,0, \
-		((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)) , h, l}
-
-#define signbyte(x) (((u_char *)(x))[6])
-#define significand(x) (((u64 *)&((x)->sigh))[0])
-
-#else
-
-#define MAKE_REG(s,e,l,h) { l, h, \
-		((EXTENDED_Ebias+(e)) | ((SIGN_##s != 0)*0x8000)), 0,0,0 }
-
-#define signbyte(x) (((u_char *)(x))[9])
-#define significand(x) (((u64 *)&((x)->sigl))[0])
-
-#endif
-
-#define getsign(a) (signbyte(a) & 0x80)
-#define setsign(a,b) { if (b) signbyte(a) |= 0x80; else signbyte(a) &= 0x7f; }
-#define copysign(a,b) { if (getsign(a)) signbyte(b) |= 0x80; \
-                        else signbyte(b) &= 0x7f; }
-#define changesign(a) { signbyte(a) ^= 0x80; }
-#define setpositive(a) { signbyte(a) &= 0x7f; }
-#define setnegative(a) { signbyte(a) |= 0x80; }
-#define signpositive(a) ( (signbyte(a) & 0x80) == 0 )
-#define signnegative(a) (signbyte(a) & 0x80)
-
-BX_C_INLINE void reg_copy(FPU_REG const *x, FPU_REG *y)
-{
-  y->exp = x->exp;
-  significand(y) = significand(x);
-}
-
-#define exponent(x)  (((x)->exp & 0x7fff) - EXTENDED_Ebias)
-#define setexponentpos(x,y) { (x)->exp = ((y) + EXTENDED_Ebias) & 0x7fff; }
-#define exponent16(x)         (x)->exp
-#define setexponent16(x,y)  { (x)->exp = (y); }
-#define addexponent(x,y)    { (x)->exp += (y); }
-#define stdexp(x)           { (x)->exp += EXTENDED_Ebias; }
-
-#define isdenormal(ptr)   (exponent(ptr) == EXP_BIAS+EXP_UNDER)
-
-/*----- Prototypes for functions written in assembler -----*/
-asmlinkage int FPU_normalize_nuo(FPU_REG *x, int bias);
-asmlinkage int FPU_u_sub(FPU_REG const *arg1, FPU_REG const *arg2,
-			 FPU_REG *answ, u16 control_w, u_char sign,
-			 s32 expa, s32 expb);
-asmlinkage int FPU_u_mul(FPU_REG const *arg1, FPU_REG const *arg2,
-			 FPU_REG *answ, u16 control_w, u_char sign,
-			 s32 expon);
-asmlinkage int FPU_u_div(FPU_REG const *arg1, FPU_REG const *arg2,
-			 FPU_REG *answ, u16 control_w, u_char sign);
-asmlinkage int FPU_u_add(FPU_REG const *arg1, FPU_REG const *arg2,
-			 FPU_REG *answ, u16 control_w, u_char sign,
-			 s32 expa, s32 expb);
-asmlinkage int wm_sqrt(FPU_REG *n, u16 control_w, u_char sign);
-asmlinkage u32 FPU_shrx(void *l, u32 x);
-asmlinkage u32 FPU_shrxs(void *v, u32 x);
-asmlinkage int FPU_round(FPU_REG *arg, u32 extent,
-			 u16 control_w, u_char sign);
-
-#ifndef MAKING_PROTO
-#include "fpu_proto.h"
-#endif
-
-#endif /* !defined _FPU_EMU_H_ */
--- a/bochs/fpu/fpu_entry.c
+++ b/bochs/fpu/fpu_entry.c
@ -1,390 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  fpu_entry.c                                                              |
- |  $Id: fpu_entry.c,v 1.18 2003-11-01 18:36:19 sshwarts Exp $
- |                                                                           |
- | The entry functions for wm-FPU-emu                                        |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1996,1997                                    |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@suburbia.net                              |
- |                                                                           |
- | See the files "README" and "COPYING" for further copyright and warranty   |
- | information.                                                              |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- | Note:                                                                     |
- |    The file contains code which accesses user memory.                     |
- |    Emulator static data may change when user memory is accessed, due to   |
- |    other processes using the emulator while swapping is in progress.      |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- | math_emulate(), restore_i387_soft() and save_i387_soft() are the only     |
- | entry points for wm-FPU-emu.                                              |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_system.h"
-#include "fpu_emu.h"
-#include "exception.h"
-#include "control_w.h"
-#include "status_w.h"
-
-#include <linux/signal.h>
-
-#define __BAD__ FPU_illegal   /* Illegal on an 80486, causes SIGILL */
-
-#if BX_CPU_LEVEL < 6
-
-static FUNC const st_instr_table[64] = {
-  fadd__,  fld_i_,    __BAD__, __BAD__, fadd_i,  ffree_,  faddp_,  __BAD__,
-  fmul__,  fxch_i,    __BAD__, __BAD__, fmul_i,  __BAD__, fmulp_,  __BAD__,
-  fcom_st, fp_nop,    __BAD__, __BAD__, __BAD__, fst_i_,  __BAD__, __BAD__,
-  fcompst, __BAD__,   __BAD__, __BAD__, __BAD__, fstp_i,  fcompp,  __BAD__,
-  fsub__,  FPU_etc,   __BAD__, finit_,  fsubri,  fucom_,  fsubrp,  fstsw_,
-  fsubr_,  fconst,    fucompp, __BAD__, fsub_i,  fucomp,  fsubp_,  __BAD__,
-  fdiv__,  FPU_triga, __BAD__, __BAD__, fdivri,  __BAD__, fdivrp,  __BAD__,
-  fdivr_,  FPU_trigb, __BAD__, __BAD__, fdiv_i,  __BAD__, fdivp_,  __BAD__,
-};
-
-#else
-
-static FUNC const st_instr_table[64] = {
-  fadd__,  fld_i_,    FPU_fcmovb,  FPU_fcmovnb,  fadd_i,  ffree_,  faddp_,  __BAD__,
-  fmul__,  fxch_i,    FPU_fcmove,  FPU_fcmovne,  fmul_i,  __BAD__, fmulp_,  __BAD__,
-  fcom_st, fp_nop,    FPU_fcmovbe, FPU_fcmovnbe, __BAD__, fst_i_,  __BAD__, __BAD__,
-  fcompst, __BAD__,   FPU_fcmovu,  FPU_fcmovnu,  __BAD__, fstp_i,  fcompp,  __BAD__,
-  fsub__,  FPU_etc,   __BAD__,     finit_,       fsubri,  fucom_,  fsubrp,  fstsw_,
-  fsubr_,  fconst,    fucompp,     FPU_fucomi,   fsub_i,  fucomp,  fsubp_,  FPU_fucomip,
-  fdiv__,  FPU_triga, __BAD__,     FPU_fcomi,    fdivri,  __BAD__, fdivrp,  FPU_fcomip,
-  fdivr_,  FPU_trigb, __BAD__,     __BAD__,      fdiv_i,  __BAD__, fdivp_,  __BAD__,
-};
-
-#endif
-
-#define _NONE_ 0   /* Take no special action */
-#define _REG0_ 1   /* Need to check for not empty st(0) */
-#define _REGI_ 2   /* Need to check for not empty st(0) and st(rm) */
-#define _REGi_ 0   /* Uses st(rm) */
-#define _PUSH_ 3   /* Need to check for space to push onto stack */
-#define _null_ 4   /* Function illegal or not implemented */
-#define _REGIi 5   /* Uses st(0) and st(rm), result to st(rm) */
-#define _REGIp 6   /* Uses st(0) and st(rm), result to st(rm) then pop */
-#define _REGIc 0   /* Compare st(0) and st(rm) */
-#define _REGIn 0   /* Uses st(0) and st(rm), but handle checks later */
-
-#if BX_CPU_LEVEL < 6
-
-static u_char const type_table[64] = {
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _REGi_, _REGIp, _null_,
-  _REGI_, _REGIn, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-  _REGIc, _NONE_, _null_, _null_, _null_, _REG0_, _null_, _null_,
-  _REGIc, _null_, _null_, _null_, _null_, _REG0_, _REGIc, _null_,
-  _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
-  _REGI_, _NONE_, _REGIc, _null_, _REGIi, _REGIc, _REGIp, _null_,
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_,
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
-};
-
-#else
-
-static u_char const type_table[64] = {
-  _REGI_, _NONE_, _REGIn, _REGIn, _REGIi, _REGi_, _REGIp, _null_,
-  _REGI_, _REGIn, _REGIn, _REGIn, _REGIi, _null_, _REGIp, _null_,
-  _REGIc, _NONE_, _REGIn, _REGIn, _null_, _REG0_, _null_, _null_,
-  _REGIc, _null_, _REGIn, _REGIn, _null_, _REG0_, _REGIc, _null_,
-  _REGI_, _NONE_, _null_, _NONE_, _REGIi, _REGIc, _REGIp, _NONE_,
-  _REGI_, _NONE_, _REGIc, _REGIc, _REGIi, _REGIc, _REGIp, _REGIc,
-  _REGI_, _NONE_, _null_, _REGIc, _REGIi, _null_, _REGIp, _REGIc,
-  _REGI_, _NONE_, _null_, _null_, _REGIi, _null_, _REGIp, _null_
-};
-
-#endif
-
-/* Note, this is a version of fpu_entry.c, modified to interface
- * to a CPU simulator, rather than a kernel.
- *
- * Ported by Kevin Lawton Sep 20, 1999
- */
-
-  asmlinkage void
-math_emulate(fpu_addr_modes addr_modes,
-              u_char FPU_modrm,
-              u_char byte1,
-              bx_address data_address,
-              struct address data_sel_off,
-              struct address entry_sel_off)
-{
-  u16 code;
-  int unmasked;
-  FPU_REG loaded_data;
-  FPU_REG *st0_ptr;
-  u_char  loaded_tag, st0_tag;
-
-  /* assuming byte is 0xd8..0xdf or 0xdb==FWAIT */
-
-  /* lock is not a valid prefix for FPU instructions, +++
-     let the cpu handle it to generate a SIGILL. */
-
-  no_ip_update = 0;
-
-  if (byte1 == FWAIT_OPCODE) {
-    if (FPU_partial_status & SW_Summary)
-      goto do_the_FPU_interrupt;
-    else
-      return;
-    }
-
-  if (FPU_partial_status & SW_Summary) {
-    /* Ignore the error for now if the current instruction is a no-wait
-       control instruction */
-    /* The 80486 manual contradicts itself on this topic,
-       but a real 80486 uses the following instructions:
-       fninit, fnstenv, fnsave, fnstsw, fnstenv, fnclex.
-       */
-    code = (FPU_modrm << 8) | byte1;
-    if (! ((((code & 0xf803) == 0xe003) ||    /* fnclex, fninit, fnstsw */
-              (((code & 0x3003) == 0x3001) &&   /* fnsave, fnstcw, fnstenv,
-                                                   fnstsw */
-               ((code & 0xc000) != 0xc000))))) {
-      /*
-       *  We need to simulate the action of the kernel to FPU
-       *  interrupts here.
-       */
-do_the_FPU_interrupt:
-
-      math_abort(NULL, SIGFPE);
-      }
-    }
-
-  entry_sel_off.opcode = (byte1 << 8) | FPU_modrm;
-
-  FPU_rm = FPU_modrm & 7;
-
-  if (FPU_modrm < 0300) {
-      /* All of these instructions use the mod/rm byte to get a data address */
-
-      if (!(byte1 & 1)) {
-          u16 status1 = FPU_partial_status;
-
-          st0_ptr = &st(0);
-          st0_tag = FPU_gettag0();
-
-          /* Stack underflow has priority */
-          if (NOT_EMPTY_ST0) {
-              unmasked = 0;  /* Do this here to stop compiler warnings. */
-              switch ((byte1 >> 1) & 3)
-                {
-                case 0:
-                  unmasked = FPU_load_single(data_address, &loaded_data);
-                  loaded_tag = unmasked & 0xff;
-                  unmasked &= ~0xff;
-                  break;
-                case 1:
-                  loaded_tag = FPU_load_int32(data_address, &loaded_data);
-                  break;
-                case 2:
-                  unmasked = FPU_load_double(data_address, &loaded_data);
-                  loaded_tag = unmasked & 0xff;
-                  unmasked &= ~0xff;
-                  break;
-                case 3:
-                default:  /* Used here to suppress gcc warnings. */
-                  loaded_tag = FPU_load_int16(data_address, &loaded_data);
-                  break;
-                }
-
-              /* No more access to user memory, it is safe
-                 to use static data now */
-
-              /* NaN operands have the next priority. */
-              /* We have to delay looking at st(0) until after
-                 loading the data, because that data might contain an SNaN */
-              if (((st0_tag == TAG_Special) && isNaN(st0_ptr)) ||
-                  ((loaded_tag == TAG_Special) && isNaN(&loaded_data)))
-                {
-                  /* Restore the status word; we might have loaded a
-                     denormal. */
-                  FPU_partial_status = status1;
-                  if ((FPU_modrm & 0x30) == 0x10)
-                    {
-                      /* fcom or fcomp */
-                      EXCEPTION(EX_Invalid);
-                      setcc(SW_C3 | SW_C2 | SW_C0);
-                      if ((FPU_modrm & 0x08) && (FPU_control_word & CW_Invalid))
-                        FPU_pop();             /* fcomp, masked, so we pop. */
-                    }
-                  else
-                    {
-                      if (loaded_tag == TAG_Special)
-                        loaded_tag = FPU_Special(&loaded_data);
-#ifdef PECULIAR_486
-                      /* This is not really needed, but gives behaviour
-                         identical to an 80486 */
-                      if ((FPU_modrm & 0x28) == 0x20)
-                        /* fdiv or fsub */
-                        real_2op_NaN(&loaded_data, loaded_tag, 0, &loaded_data);
-                      else
-#endif /* PECULIAR_486 */
-                        /* fadd, fdivr, fmul, or fsubr */
-                        real_2op_NaN(&loaded_data, loaded_tag, 0, st0_ptr);
-                    }
-                  goto reg_mem_instr_done;
-                }
-
-              if (unmasked && !((FPU_modrm & 0x30) == 0x10))
-                {
-                  /* Is not a comparison instruction. */
-                  if ((FPU_modrm & 0x38) == 0x38)
-                    {
-                      /* fdivr */
-                      if ((st0_tag == TAG_Zero) &&
-                           ((loaded_tag == TAG_Valid)
-                            || (loaded_tag == TAG_Special
-                                && isdenormal(&loaded_data))))
-                        {
-                          if (FPU_divide_by_zero(0, getsign(&loaded_data)) < 0)
-                            {
-                              /* We use the fact here that the unmasked
-                                 exception in the loaded data was for a
-                                 denormal operand */
-                              /* Restore the state of the denormal op bit */
-                              FPU_partial_status &= ~SW_Denorm_Op;
-                              FPU_partial_status |= status1 & SW_Denorm_Op;
-                            }
-                          else
-                            setsign(st0_ptr, getsign(&loaded_data));
-                        }
-                    }
-                  goto reg_mem_instr_done;
-                }
-
-              switch ((FPU_modrm >> 3) & 7)
-                {
-                case 0:         /* fadd */
-                  clear_C1();
-                  FPU_add(&loaded_data, loaded_tag, 0, FPU_control_word);
-                  break;
-                case 1:         /* fmul */
-                  clear_C1();
-                  FPU_mul(&loaded_data, loaded_tag, 0, FPU_control_word);
-                  break;
-                case 2:         /* fcom */
-                  FPU_compare_st_data(&loaded_data, loaded_tag);
-                  break;
-                case 3:         /* fcomp */
-		  /* bbd: used to typecase to int first, but this corrupted the
-		     pointer on 64 bit machines. */
-                  if (!FPU_compare_st_data(&loaded_data, loaded_tag) && !unmasked)
-                    FPU_pop();
-                  break;
-                case 4:         /* fsub */
-                  clear_C1();
-                  FPU_sub(LOADED|loaded_tag, &loaded_data, FPU_control_word);
-                  break;
-                case 5:         /* fsubr */
-                  clear_C1();
-                  FPU_sub(REV|LOADED|loaded_tag, &loaded_data, FPU_control_word);
-                  break;
-                case 6:         /* fdiv */
-                  clear_C1();
-                  FPU_div(LOADED|loaded_tag, &loaded_data, FPU_control_word);
-                  break;
-                case 7:         /* fdivr */
-                  clear_C1();
-                  if (st0_tag == TAG_Zero)
-                    FPU_partial_status = status1;  /* Undo any denorm tag,
-                                                  zero-divide has priority. */
-                  FPU_div(REV|LOADED|loaded_tag, &loaded_data, FPU_control_word);
-                  break;
-                }
-            }
-          else
-            {
-              if ((FPU_modrm & 0x30) == 0x10)
-                {
-                  /* The instruction is fcom or fcomp */
-                  EXCEPTION(EX_StackUnder);
-                  setcc(SW_C3 | SW_C2 | SW_C0);
-                  if ((FPU_modrm & 0x08) && (FPU_control_word & CW_Invalid))
-                    FPU_pop();             /* fcomp */
-                }
-              else
-                FPU_stack_underflow();
-            }
-        reg_mem_instr_done:
-          FPU_operand_address = data_sel_off;
-        }
-      else {
-          if (!(no_ip_update =
-                 FPU_load_store(((FPU_modrm & 0x38) | (byte1 & 6)) >> 1,
-                                addr_modes, data_address)))
-            {
-              FPU_operand_address = data_sel_off;
-            }
-        }
-    }
-  else {
-      /* None of these instructions access user memory */
-      u_char instr_index = (FPU_modrm & 0x38) | (byte1 & 7);
-
-#ifdef PECULIAR_486
-      /* This is supposed to be undefined, but a real 80486 seems
-         to do this: */
-      FPU_operand_address.offset = 0;
-      FPU_operand_address.selector = FPU_DS;
-#endif /* PECULIAR_486 */
-
-      st0_ptr = &st(0);
-      st0_tag = FPU_gettag0();
-      switch (type_table[(int) instr_index])
-        {
-        case _NONE_:   /* also _REGIc: _REGIn */
-          break;
-        case _REG0_:
-          if (!NOT_EMPTY_ST0)
-            {
-              FPU_stack_underflow();
-              goto FPU_instruction_done;
-            }
-          break;
-        case _REGIi:
-          if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm))
-            {
-              FPU_stack_underflow_i(FPU_rm);
-              goto FPU_instruction_done;
-            }
-          break;
-        case _REGIp:
-          if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm))
-            {
-              FPU_stack_underflow_pop(FPU_rm);
-              goto FPU_instruction_done;
-            }
-          break;
-        case _REGI_:
-          if (!NOT_EMPTY_ST0 || !NOT_EMPTY(FPU_rm))
-            {
-              FPU_stack_underflow();
-              goto FPU_instruction_done;
-            }
-          break;
-        case _PUSH_:     /* Only used by the fld st(i) instruction */
-          break;
-        case _null_:
-          FPU_illegal();
-          goto FPU_instruction_done;
-        default:
-          INTERNAL(0x111);
-          goto FPU_instruction_done;
-        }
-      (*st_instr_table[(int) instr_index])();
-
-FPU_instruction_done:
-      ;
-    }
-
-  if (! no_ip_update)
-    FPU_instruction_address = entry_sel_off;
-}
--- a/bochs/fpu/fpu_etc.c
+++ b/bochs/fpu/fpu_etc.c
@ -1,144 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  fpu_etc.c                                                                |
- |  $Id: fpu_etc.c,v 1.5 2003-10-04 12:32:56 sshwarts Exp $
- |                                                                           |
- | Implement a few FPU instructions.                                         |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1997                                         |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail   billm@suburbia.net             |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_system.h"
-#include "exception.h"
-#include "fpu_emu.h"
-#include "status_w.h"
-#include "reg_constant.h"
-
-
-static void fchs(FPU_REG *st0_ptr, u_char st0tag)
-{
-  if ( st0tag ^ TAG_Empty )
-    {
-      signbyte(st0_ptr) ^= SIGN_NEG;
-      clear_C1();
-    }
-  else
-    FPU_stack_underflow();
-}
-
-
-static void fpu_fabs(FPU_REG *st0_ptr, u_char st0tag)
-{
-  if ( st0tag ^ TAG_Empty )
-    {
-      setpositive(st0_ptr);
-      clear_C1();
-    }
-  else
-    FPU_stack_underflow();
-}
-
-
-static void ftst_(FPU_REG *st0_ptr, u_char st0tag)
-{
-  switch (st0tag)
-    {
-    case TAG_Zero:
-      setcc(SW_C3);
-      break;
-    case TAG_Valid:
-      if (getsign(st0_ptr) == SIGN_POS)
-        setcc(0);
-      else
-        setcc(SW_C0);
-      break;
-    case TAG_Special:
-      switch ( FPU_Special(st0_ptr) )
-	{
-	case TW_Denormal:
-	  if (getsign(st0_ptr) == SIGN_POS)
-	    setcc(0);
-	  else
-	    setcc(SW_C0);
-	  if ( denormal_operand() < 0 )
-	    {
-#ifdef PECULIAR_486
-	      /* This is weird! */
-	      if (getsign(st0_ptr) == SIGN_POS)
-		setcc(SW_C3);
-#endif /* PECULIAR_486 */
-	      return;
-	    }
-	  break;
-	case TW_NaN:
-	  setcc(SW_C0|SW_C2|SW_C3);   /* Operand is not comparable */ 
-	  EXCEPTION(EX_Invalid);
-	  break;
-	case TW_Infinity:
-	  if (getsign(st0_ptr) == SIGN_POS)
-	    setcc(0);
-	  else
-	    setcc(SW_C0);
-	  break;
-	default:
-	  setcc(SW_C0|SW_C2|SW_C3);   /* Operand is not comparable */ 
-	  INTERNAL(0x14);
-	  break;
-	}
-      break;
-    case TAG_Empty:
-      setcc(SW_C0|SW_C2|SW_C3);
-      EXCEPTION(EX_StackUnder);
-      break;
-    }
-}
-
-
-static void fxam(FPU_REG *st0_ptr, u_char st0tag)
-{
-  int c = 0;
-  switch (st0tag)
-    {
-    case TAG_Empty:
-      c = SW_C3|SW_C0;
-      break;
-    case TAG_Zero:
-      c = SW_C3;
-      break;
-    case TAG_Valid:
-      c = SW_C2;
-      break;
-    case TAG_Special:
-      switch ( FPU_Special(st0_ptr) )
-	{
-	case TW_Denormal:
-	  c = SW_C2|SW_C3;  /* Denormal */
-	  break;
-	case TW_NaN:
-	  /* We also use NaN for unsupported types. */
-	  if ( (st0_ptr->sigh & 0x80000000) && (exponent(st0_ptr) == EXP_OVER) )
-	    c = SW_C0;
-	  break;
-	case TW_Infinity:
-	  c = SW_C2|SW_C0;
-	  break;
-	}
-    }
-  if ( getsign(st0_ptr) == SIGN_NEG )
-    c |= SW_C1;
-  setcc(c);
-}
-
-
-static FUNC_ST0 const fp_etc_table[] = {
-  fchs, fpu_fabs, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal,
-  ftst_, fxam, (FUNC_ST0)FPU_illegal, (FUNC_ST0)FPU_illegal
-};
-
-void FPU_etc()
-{
-  (fp_etc_table[FPU_rm])(&st(0), FPU_gettag0());
-}
--- a/bochs/fpu/fpu_load_store.cc
+++ b/bochs/fpu/fpu_load_store.cc
@ -0,0 +1,681 @@
+/////////////////////////////////////////////////////////////////////////
+//  Copyright (C) 2004  MandrakeSoft S.A.
+//
+//    MandrakeSoft S.A.
+//    43, rue d'Aboukir
+//    75002 Paris - France
+//    http://www.linux-mandrake.com/
+//    http://www.mandrakesoft.com/
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+/////////////////////////////////////////////////////////////////////////
+
+
+#define NEED_CPU_REG_SHORTCUTS 1
+#include "bochs.h"
+#define LOG_THIS BX_CPU_THIS_PTR
+
+extern float_status_t FPU_pre_exception_handling(Bit16u control_word);
+
+#if BX_SUPPORT_FPU
+#include "softfloatx80.h"
+#endif
+
+void BX_CPU_C::FLD_STi(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  if (IS_TAG_EMPTY(i->rm()))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  int sti_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(i->rm());
+  floatx80 sti_reg = BX_READ_FPU_REG(i->rm());
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(sti_reg, sti_tag, 0);
+#else
+  BX_INFO(("FLD_STi: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLD_SINGLE_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  float32 load_reg;
+  read_virtual_dword(i->seg(), RMAddr(i), &load_reg);
+
+  float_status_t status = 
+     FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  // convert to floatx80 format
+  floatx80 result = float32_to_floatx80(load_reg, status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+     return;
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FLD_SINGLE_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLD_DOUBLE_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  float64 load_reg;
+  read_virtual_qword(i->seg(), RMAddr(i), &load_reg);
+
+  float_status_t status = 
+     FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  // convert to floatx80 format
+  floatx80 result = float64_to_floatx80(load_reg, status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+     return;
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FLD_DOUBLE_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLD_EXTENDED_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  floatx80 result;
+  read_virtual_tword(i->seg(), RMAddr(i), &result);
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FLD_EXTENDED_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DF /0 */
+void BX_CPU_C::FILD_WORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  Bit16s load_reg;
+  read_virtual_word(i->seg(), RMAddr(i), (Bit16u*)(&load_reg));
+  floatx80 result = int32_to_floatx80((Bit32s) load_reg);
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FILD_WORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DB /0 */
+void BX_CPU_C::FILD_DWORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  Bit32s load_reg;
+  read_virtual_dword(i->seg(), RMAddr(i), (Bit32u*)(&load_reg));
+  floatx80 result = int32_to_floatx80(load_reg);
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FILD_DWORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DF /5 */
+void BX_CPU_C::FILD_QWORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  Bit64s load_reg;
+  read_virtual_qword(i->seg(), RMAddr(i), (Bit64u*)(&load_reg));
+  floatx80 result = int64_to_floatx80(load_reg);
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FILD_QWORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DF /4 */
+void BX_CPU_C::FBLD_PACKED_BCD(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  Bit16u hi2;
+  Bit64u lo8;
+
+  // read packed bcd from memory
+  read_virtual_qword(i->seg(), RMAddr(i),     &lo8);
+  read_virtual_word (i->seg(), RMAddr(i) + 8, &hi2);
+
+  Bit64s scale = 1; 
+  Bit64s val64 = 0;
+
+  for (int i = 0; i < 16; i++)
+  {
+    val64 += (lo8 & 0x0F) * scale;
+    lo8 >>= 4;
+    scale *= 10;
+  }
+
+  val64 += (hi2 & 0x0F) * scale;
+  val64 += ((hi2>>4) & 0x0F) * scale * 10;
+
+  floatx80 result = int64_to_floatx80(val64);
+  if (hi2 & 0x8000)	// set negative
+      floatx80_chs(result);
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FBLD_PACKED_BCD: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FST_STi(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  if (st0_tag == FPU_Tag_Empty)
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(i->rm(), pop_stack);
+     return;
+  }
+
+  floatx80 st0_reg = BX_READ_FPU_REG(0);
+
+  BX_WRITE_FPU_REGISTER_AND_TAG(st0_reg, st0_tag, i->rm());
+  if (pop_stack)
+     BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FST(P)_STi: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FST_SINGLE_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  float32 save_reg = float32_default_nan; /* The masked response */
+
+  int pop_stack = i->nnn() & 1;
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     float_status_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     save_reg = floatx80_to_float32(BX_READ_FPU_REG(0), status);
+
+     if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+        return;
+  }
+
+  write_virtual_dword(i->seg(), RMAddr(i), &save_reg);
+
+  if (pop_stack)
+     BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FST(P)_SINGLE_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FST_DOUBLE_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  float64 save_reg = float64_default_nan; /* The masked response */
+
+  int pop_stack = i->nnn() & 1;
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     float_status_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     save_reg = floatx80_to_float64(BX_READ_FPU_REG(0), status);
+
+     if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+        return;
+  }
+
+  write_virtual_qword(i->seg(), RMAddr(i), &save_reg);
+
+  if (pop_stack)
+     BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FST(P)_DOUBLE_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DB /7 */
+void BX_CPU_C::FSTP_EXTENDED_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  floatx80 save_reg = floatx80_default_nan; /* The masked response */
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     save_reg = BX_READ_FPU_REG(0);
+  }
+
+  write_virtual_tword(i->seg(), RMAddr(i), &save_reg);
+
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FSTP_EXTENDED_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FIST_WORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  Bit16s save_reg = int16_indefinite;
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+      float_status_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+      save_reg = floatx80_to_int16(BX_READ_FPU_REG(0), status);
+
+      if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+        return;
+  }
+
+  write_virtual_word(i->seg(), RMAddr(i), (Bit16u*)(&save_reg));
+
+  if (pop_stack)
+     BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FIST(P)_WORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FIST_DWORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  Bit32s save_reg = int32_indefinite; /* The masked response */
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     float_status_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     save_reg = floatx80_to_int32(BX_READ_FPU_REG(0), status);
+
+     if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+         return;
+  }
+
+  write_virtual_dword(i->seg(), RMAddr(i), (Bit32u*)(&save_reg));
+
+  if (pop_stack)
+     BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FIST(P)_DWORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FISTP_QWORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  Bit64s save_reg = int64_indefinite; /* The masked response */
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     float_status_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     save_reg = floatx80_to_int64(BX_READ_FPU_REG(0), status);
+
+     if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+         return;
+  }
+
+  write_virtual_qword(i->seg(), RMAddr(i), (Bit64u*)(&save_reg));
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FISTP_QWORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FBSTP_PACKED_BCD(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  /* 
+   * The packed BCD integer indefinite encoding (FFFFC000000000000000H) 
+   * is stored in response to a masked floating-point invalid-operation 
+   * exception.
+   */
+  Bit16u save_reg_hi = 0xFFFF;
+  Bit64u save_reg_lo = BX_CONST64(0xC000000000000000);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     float_status_t status = 
+        FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     Bit64s save_val = floatx80_to_int64(BX_READ_FPU_REG(0), status);
+
+     int sign = (save_val < 0);
+     if (sign) 
+        save_val = -save_val;
+ 
+     if (save_val > BX_CONST64(999999999999999999))
+     {
+        float_raise(status, float_flag_invalid);
+     }
+     
+     if (! (status.float_exception_flags & float_flag_invalid))
+     {
+        save_reg_hi = (sign) ? 0x8000 : 0;
+        save_reg_lo = 0;
+
+        for (int i=0; i<16; i++)
+        {
+           save_reg_lo += ((Bit64u)(save_val % 10)) << (4*i);
+           save_val /= 10;
+        }
+
+        save_reg_hi += (save_val % 10);
+        save_val /= 10;
+        save_reg_hi += (save_val % 10) << 4;
+    }
+
+    /* check for fpu arithmetic exceptions */
+    if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+        return;
+  }
+
+  // write packed bcd to memory
+  write_virtual_qword(i->seg(), RMAddr(i),     &save_reg_lo);
+  write_virtual_word (i->seg(), RMAddr(i) + 8, &save_reg_hi);
+
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FBSTP_PACKED_BCD: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DF /1 */
+void BX_CPU_C::FISTTP16(bxInstruction_c *i)
+{
+#if BX_SUPPORT_PNI
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  Bit16s save_reg = int16_indefinite; /* The masked response */
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+      float_status_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+      save_reg = floatx80_to_int16_round_to_zero(BX_READ_FPU_REG(0), status);
+
+      if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+        return;
+  }
+
+  write_virtual_word(i->seg(), RMAddr(i), (Bit16u*)(&save_reg));
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FISTTP16: required PNI, configure --enable-pni"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* DB /1 */
+void BX_CPU_C::FISTTP32(bxInstruction_c *i)
+{
+#if BX_SUPPORT_PNI
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  Bit32s save_reg = int32_indefinite; /* The masked response */
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     float_status_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     save_reg = floatx80_to_int32_round_to_zero(BX_READ_FPU_REG(0), status);
+
+     if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+         return;
+  }
+
+  write_virtual_dword(i->seg(), RMAddr(i), (Bit32u*)(&save_reg));
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FISTTP32: required PNI, configure --enable-pni"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* DD /1 */
+void BX_CPU_C::FISTTP64(bxInstruction_c *i)
+{
+#if BX_SUPPORT_PNI
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  Bit64s save_reg = int64_indefinite; /* The masked response */
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     float_status_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     save_reg = floatx80_to_int64_round_to_zero(BX_READ_FPU_REG(0), status);
+
+     if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+         return;
+  }
+
+  write_virtual_qword(i->seg(), RMAddr(i), (Bit64u*)(&save_reg));
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FISTTP64: required PNI, configure --enable-pni"));
+  UndefinedOpcode(i);
+#endif
+}
--- a/bochs/fpu/fpu_misc.cc
+++ b/bochs/fpu/fpu_misc.cc
@ -0,0 +1,158 @@
+/////////////////////////////////////////////////////////////////////////
+//  Copyright (C) 2004  MandrakeSoft S.A.
+//
+//    MandrakeSoft S.A.
+//    43, rue d'Aboukir
+//    75002 Paris - France
+//    http://www.linux-mandrake.com/
+//    http://www.mandrakesoft.com/
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+/////////////////////////////////////////////////////////////////////////
+
+
+#define NEED_CPU_REG_SHORTCUTS 1
+#include "bochs.h"
+#define LOG_THIS BX_CPU_THIS_PTR
+
+
+#if BX_SUPPORT_FPU
+#include "softfloatx80.h"
+#endif
+
+/* D9 C8 */
+void BX_CPU_C::FXCH_STi(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  int sti_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(i->rm());
+
+  floatx80 st0_reg = BX_READ_FPU_REG(0);
+  floatx80 sti_reg = BX_READ_FPU_REG(i->rm());
+
+  clear_C1();
+
+  if (st0_tag == FPU_Tag_Empty || sti_tag == FPU_Tag_Empty)
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+	  /* Masked response */
+          if (st0_tag == FPU_Tag_Empty)
+          {
+              st0_reg = floatx80_default_nan;
+              st0_tag = FPU_Tag_Special;
+          }
+          if (sti_tag == FPU_Tag_Empty)
+          {
+              sti_reg = floatx80_default_nan;
+              sti_tag = FPU_Tag_Special;
+          }
+      }
+      else return;
+  }
+
+  BX_WRITE_FPU_REGISTER_AND_TAG(st0_reg, st0_tag, i->rm());
+  BX_WRITE_FPU_REGISTER_AND_TAG(sti_reg, sti_tag, 0);
+#else
+  BX_INFO(("FXCH_STi: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 E0 */
+void BX_CPU_C::FCHS(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  if (st0_tag == FPU_Tag_Empty)
+  {
+      BX_CPU_THIS_PTR FPU_stack_underflow(0);
+      return;
+  }
+
+  clear_C1();
+
+  floatx80 st0_reg = BX_READ_FPU_REG(0);
+  BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_chs(st0_reg), st0_tag, 0);
+#else
+  BX_INFO(("FCHS: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 E1 */
+void BX_CPU_C::FABS(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  if (st0_tag == FPU_Tag_Empty)
+  {
+      BX_CPU_THIS_PTR FPU_stack_underflow(0);
+      return;
+  }
+
+  clear_C1();
+
+  floatx80 st0_reg = BX_READ_FPU_REG(0);
+  BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_abs(st0_reg), st0_tag, 0);
+#else
+  BX_INFO(("FABS: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F6 */
+void BX_CPU_C::FDECSTP(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  BX_CPU_THIS_PTR the_i387.tos = (BX_CPU_THIS_PTR the_i387.tos-1) & 7;
+#else
+  BX_INFO(("FDECSTP: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F7 */
+void BX_CPU_C::FINCSTP(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  BX_CPU_THIS_PTR the_i387.tos = (BX_CPU_THIS_PTR the_i387.tos+1) & 7;
+#else
+  BX_INFO(("FINCSTP: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DD C0 */
+void BX_CPU_C::FFREE_STi(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+  BX_CPU_THIS_PTR the_i387.FPU_settagi(FPU_Tag_Empty, i->rm());
+#else
+  BX_INFO(("FFREE_STi: required FPU, configure --enable-fpu"));
+#endif
+}
--- a/bochs/fpu/fpu_proto.h
+++ b/bochs/fpu/fpu_proto.h
@ -1,180 +0,0 @@
-/*
- * Copyright (C) 2001  MandrakeSoft S.A.
- *
- *   MandrakeSoft S.A.
- *   43, rue d'Aboukir
- *   75002 Paris - France
- *   http: //www.linux-mandrake.com/
- *   http: //www.mandrakesoft.com/
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-/*
- * bochs.h is the master header file for all C++ code.  It includes all 
- * the system header files needed by bochs, and also includes all the bochs
- * C++ header files.  Because bochs.h and the files that it includes has 
- * structure and class definitions, it cannot be called from C code.
- */
-
-#ifndef _FPU_PROTO_H
-#define _FPU_PROTO_H
-
-/* errors.c */
-extern void FPU_illegal(void);
-asmlinkage void FPU_exception(int n);
-asmlinkage void FPU_internal(int n);
-extern int real_1op_NaN(FPU_REG *a);
-extern int real_2op_NaN(FPU_REG const *b, u_char tagb, int deststnr,
-			FPU_REG const *defaultNaN);
-extern int arith_invalid(int deststnr);
-extern int FPU_divide_by_zero(int deststnr, u_char sign);
-extern int set_precision_flag(int flags);
-extern void set_precision_flag_up(void);
-extern void set_precision_flag_down(void);
-extern int denormal_operand(void);
-extern int arith_overflow(FPU_REG *dest);
-extern int arith_round_overflow(FPU_REG *dest, u8 sign);
-extern int arith_underflow(FPU_REG *dest);
-extern void FPU_stack_overflow(void);
-extern void FPU_stack_underflow(void);
-extern void FPU_stack_underflow_i(int i);
-extern void FPU_stack_underflow_pop(int i);
-/* fpu_arith.c */
-extern void fadd__(void);
-extern void fmul__(void);
-extern void fsub__(void);
-extern void fsubr_(void);
-extern void fdiv__(void);
-extern void fdivr_(void);
-extern void fadd_i(void);
-extern void fmul_i(void);
-extern void fsubri(void);
-extern void fsub_i(void);
-extern void fdivri(void);
-extern void fdiv_i(void);
-extern void faddp_(void);
-extern void fmulp_(void);
-extern void fsubrp(void);
-extern void fsubp_(void);
-extern void fdivrp(void);
-extern void fdivp_(void);
-/* fpu_aux.c */
-extern void fclex(void);
-extern void finit(void);
-extern void finit_(void);
-extern void fstsw_(void);
-extern void fp_nop(void);
-extern void fld_i_(void);
-extern void fxch_i(void);
-extern void ffree_(void);
-extern void fst_i_(void);
-extern void fstp_i(void);
-/* fpu_entry.c */
-extern void math_abort(void *info, unsigned int signal);
-/* fpu_etc.c */
-extern void FPU_etc(void);
-/* fpu_tags.c */
-extern int FPU_gettag0(void);
-extern int FPU_gettagi(int stnr) BX_CPP_AttrRegparmN(1);
-extern int FPU_gettag(int regnr) BX_CPP_AttrRegparmN(1);
-extern void FPU_settag0(int tag) BX_CPP_AttrRegparmN(1);
-extern void FPU_settagi(int stnr, int tag) BX_CPP_AttrRegparmN(2);
-extern void FPU_settag(int regnr, int tag) BX_CPP_AttrRegparmN(2);
-extern int FPU_Special(FPU_REG const *ptr) BX_CPP_AttrRegparmN(1);
-extern int isNaN(FPU_REG const *ptr) BX_CPP_AttrRegparmN(1);
-extern void FPU_pop(void);
-extern int FPU_empty_i(int stnr) BX_CPP_AttrRegparmN(1);
-extern int FPU_stackoverflow(FPU_REG **st_new_ptr);
-extern void FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr) BX_CPP_AttrRegparmN(3);
-extern void FPU_copy_to_reg1(FPU_REG const *r, u_char tag) BX_CPP_AttrRegparmN(2);
-extern void FPU_copy_to_reg0(FPU_REG const *r, u_char tag) BX_CPP_AttrRegparmN(2);
-/* fpu_trig.c */
-extern void FPU_triga(void);
-extern void FPU_trigb(void);
-/* load_store.c */
-extern int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
-			    bx_address data_address);
-/* poly_2xm1.c */
-extern int poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result);
-/* poly_atan.c */
-extern void poly_atan(FPU_REG *st0_ptr, u_char st0_tag, FPU_REG *st1_ptr,
-		      u_char st1_tag);
-/* poly_l2.c */
-extern void poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign);
-extern int poly_l2p1(u_char s0, u_char s1, FPU_REG *r0, FPU_REG *r1,
-		     FPU_REG *d);
-/* poly_sin.c */
-extern void poly_sine(FPU_REG *st0_ptr);
-extern void poly_cos(FPU_REG *st0_ptr);
-/* poly_tan.c */
-extern void poly_tan(FPU_REG *st0_ptr, int flag);
-/* reg_add_sub.c */
-extern int FPU_add(FPU_REG const *b, u_char tagb, int destrnr, u16 control_w);
-extern int FPU_sub(int flags, FPU_REG *rm, u16 control_w);
-/* reg_compare.c */
-extern int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag);
-extern void fcom_st(void);
-extern void fcompst(void);
-extern void fcompp(void);
-extern void fucom_(void);
-extern void fucomp(void);
-extern void fucompp(void);
-/* reg_constant.c */
-extern void fconst(void);
-/* reg_ld_str.c */
-extern int FPU_load_extended(bx_address s, int stnr) BX_CPP_AttrRegparmN(2);
-extern int FPU_load_double(bx_address dfloat, FPU_REG *loaded_data) BX_CPP_AttrRegparmN(2);
-extern int FPU_load_single(bx_address single, FPU_REG *loaded_data) BX_CPP_AttrRegparmN(2);
-extern int FPU_load_int64(bx_address _s) BX_CPP_AttrRegparmN(1);
-extern int FPU_load_int32(bx_address _s, FPU_REG *loaded_data) BX_CPP_AttrRegparmN(2);
-extern int FPU_load_int16(bx_address _s, FPU_REG *loaded_data) BX_CPP_AttrRegparmN(2);
-extern int FPU_load_bcd(bx_address s) BX_CPP_AttrRegparmN(1);
-extern int FPU_store_extended(FPU_REG *st0_ptr, u_char st0_tag,
-			      bx_address d) BX_CPP_AttrRegparmN(3);
-extern int FPU_store_double(FPU_REG *st0_ptr, u_char st0_tag, bx_address dfloat) BX_CPP_AttrRegparmN(3);
-extern int FPU_store_single(FPU_REG *st0_ptr, u_char st0_tag, bx_address single) BX_CPP_AttrRegparmN(3);
-extern int FPU_store_int64(FPU_REG *st0_ptr, u_char st0_tag, bx_address d) BX_CPP_AttrRegparmN(3);
-extern int FPU_store_int32(FPU_REG *st0_ptr, u_char st0_tag, bx_address d) BX_CPP_AttrRegparmN(3);
-extern int FPU_store_int16(FPU_REG *st0_ptr, u_char st0_tag, bx_address d) BX_CPP_AttrRegparmN(3);
-extern int FPU_store_bcd(FPU_REG *st0_ptr, u_char st0_tag, bx_address d) BX_CPP_AttrRegparmN(3);
-extern int FPU_round_to_int(FPU_REG *r, u_char tag) BX_CPP_AttrRegparmN(2);
-extern bx_address fldenv(fpu_addr_modes addr_modes, bx_address s) BX_CPP_AttrRegparmN(2);
-extern void frstor(fpu_addr_modes addr_modes, bx_address data_address) BX_CPP_AttrRegparmN(2);
-extern bx_address fstenv(fpu_addr_modes addr_modes, bx_address d) BX_CPP_AttrRegparmN(2);
-extern void fsave(fpu_addr_modes addr_modes, bx_address data_address) BX_CPP_AttrRegparmN(2);
-extern int FPU_tagof(FPU_REG *ptr) BX_CPP_AttrRegparmN(1);
-/* reg_mul.c */
-extern int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w);
-
-extern int FPU_div(int flags, FPU_REG *regrm, int control_w);
-/* reg_convert.c */
-extern int FPU_to_exp16(FPU_REG const *a, FPU_REG *x) BX_CPP_AttrRegparmN(2);
-
-/* fpu_compare.c */
-extern void FPU_fcmovb();
-extern void FPU_fcmove();
-extern void FPU_fcmovbe();
-extern void FPU_fcmovu();
-extern void FPU_fcmovnb();
-extern void FPU_fcmovne();
-extern void FPU_fcmovnbe();
-extern void FPU_fcmovnu();
-extern void FPU_fucomip();
-extern void FPU_fcomip();
-extern void FPU_fucomi();
-extern void FPU_fcomi();
-
-#endif /* _FPU_PROTO_H */
--- a/bochs/fpu/fpu_system.h
+++ b/bochs/fpu/fpu_system.h
@ -1,103 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  fpu_system.h                                                             |
- |  $Id: fpu_system.h,v 1.21 2004-02-20 01:30:52 danielg4 Exp $
- |                                                                           |
- | Copyright (C) 1992,1994,1997                                              |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail   billm@suburbia.net             |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#ifndef _FPU_SYSTEM_H
-#define _FPU_SYSTEM_H
-
-#include <stdio.h>
-
-/* Get data sizes from config.h generated from simulator's
- * configure script
- */
-#include "config.h"
-typedef Bit8u  u8; /* for FPU only */
-typedef Bit8s  s8;
-typedef Bit16u u16;
-typedef Bit16s s16;
-typedef Bit32u u32;
-typedef Bit32s s32;
-typedef Bit64u u64;
-typedef Bit64s s64;
-
-#ifndef __APPLE__
-typedef Bit8u u_char;
-#endif
-
-/* -----------------------------------------------------------
- * Slimmed down version used to compile against a CPU simulator
- * rather than a kernel (ported by Kevin Lawton)
- * ------------------------------------------------------------ */
-
-#include <cpu/i387.h>
-
-#define VERIFY_READ     0
-#define VERIFY_WRITE    1
-
-#ifndef WORDS_BIGENDIAN
-#error "WORDS_BIGENDIAN not defined in config.h"
-#elif WORDS_BIGENDIAN == 1
-#define EMU_BIG_ENDIAN 1
-#else
-/* Nothing needed.  Lack of defining EMU_BIG_ENDIAN means
- * little endian.
- */
-#endif
-
-extern u32 fpu_get_user(bx_address ptr, unsigned len) BX_CPP_AttrRegparmN(2);
-extern void fpu_put_user(u32 val, bx_address ptr, unsigned len) BX_CPP_AttrRegparmN(3);
-extern void fpu_verify_area(unsigned what, bx_address ptr, unsigned n) BX_CPP_AttrRegparmN(3);
-extern unsigned fpu_get_ds(void);
-extern void fpu_set_ax(u16 ax);
-extern u32 fpu_get_eflags();
-extern void fpu_set_eflags(u32 eflags);
-
-#define SIGSEGV  11
-
-extern struct i387_t *current_i387;
-
-#define i387     (*current_i387)
-
-#define no_ip_update            (*(u_char *)&(i387.no_update))
-#define FPU_rm                  (*(u_char *)&(i387.rm))
-
-#define FPU_partial_status      (i387.swd)
-#define FPU_control_word        (i387.cwd)
-#define FPU_tag_word            (i387.twd)
-#define FPU_registers           (i387.st_space)
-#define FPU_tos                 (i387.tos)
-
-#define FPU_register_base       ((u_char *) FPU_registers)
-
-#define FPU_instruction_address (*(struct address *)&i387.fip)
-#define FPU_operand_address     (*(struct address *)&i387.foo)
-
-#define FPU_verify_area(x,y,z)	fpu_verify_area(x,(bx_address)(y),z)
-#define FPU_get_user(val,ptr,len)       ((val) = fpu_get_user((ptr), (len)))
-#define FPU_put_user(val,ptr,len)   fpu_put_user((val),(ptr),(len))
-
-#define FPU_DS  (fpu_get_ds())
-
-/*
- * Change a pointer to an int, with type conversions that make it legal.
- * First make it a void pointer, then convert to an integer of the same
- * size as the pointer.  Otherwise, on machines with 64-bit pointers, 
- * compilers complain when you typecast a 64-bit pointer into a 32-bit integer.
- */
-#define PTR2INT(x)   ((bx_ptr_equiv_t)(void *)(x))
-
-/*
- * Change an int to a pointer, with type conversions that make it legal.
- * Same strategy as PTR2INT: change to bx_ptr_equiv_t which is an integer
- * type of the same size as FPU_REG*.  Then the conversion to pointer
- * is legal.
- */
-#define REGNO2PTR(x) ((FPU_REG*)((bx_ptr_equiv_t)(x)))
-
-#endif
--- a/bochs/fpu/fpu_tags.c
+++ b/bochs/fpu/fpu_tags.c
@ -1,102 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  fpu_tags.c                                                               |
- |  $Id: fpu_tags.c,v 1.7 2004-02-11 19:40:25 sshwarts Exp $
- |                                                                           |
- |  Set FPU register tags.                                                   |
- |                                                                           |
- | Copyright (C) 1997                                                        |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@jacobi.maths.monash.edu.au                |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_emu.h"
-#include "fpu_system.h"
-#include "exception.h"
-
-void FPU_pop(void)
-{
-  FPU_tag_word |= 3 << ((FPU_tos & 7)*2);
-  FPU_tos++;
-}
-
-int FPU_gettag0(void)
-{
-  return FPU_gettag(FPU_tos);
-}
-
-int BX_CPP_AttrRegparmN(1) FPU_gettagi(int stnr)
-{
-  return FPU_gettag(FPU_tos+stnr);
-}
-
-int BX_CPP_AttrRegparmN(1) FPU_gettag(int regnr)
-{
-  return (FPU_tag_word >> ((regnr & 7)*2)) & 3;
-}
-
-void BX_CPP_AttrRegparmN(1) FPU_settag0(int tag)
-{
-  FPU_settag(FPU_tos, tag);
-}
-
-void BX_CPP_AttrRegparmN(2) FPU_settagi(int stnr, int tag)
-{
-  FPU_settag(stnr+FPU_tos, tag);
-}
-
-void BX_CPP_AttrRegparmN(2) FPU_settag(int regnr, int tag)
-{
-  regnr &= 7;
-  FPU_tag_word &= ~(3 << (regnr*2));
-  FPU_tag_word |= (tag & 3) << (regnr*2);
-}
-
-int BX_CPP_AttrRegparmN(1) FPU_Special(FPU_REG const *ptr)
-{
-  int exp = exponent(ptr);
-
-  if ( exp == EXP_BIAS+EXP_UNDER )
-    return TW_Denormal;
-  else if ( exp != EXP_BIAS+EXP_OVER )
-    return TW_NaN;
-  else if ( (ptr->sigh == 0x80000000) && (ptr->sigl == 0) )
-    return TW_Infinity;
-  return TW_NaN;
-}
-
-int BX_CPP_AttrRegparmN(1) isNaN(FPU_REG const *ptr)
-{
-  return ((exponent(ptr) == EXP_BIAS+EXP_OVER)
-	   && !((ptr->sigh == 0x80000000) && (ptr->sigl == 0)));
-}
-
-int  BX_CPP_AttrRegparmN(1) FPU_empty_i(int stnr)
-{
-  return FPU_gettagi(stnr) == TAG_Empty;
-}
-
-int FPU_stackoverflow(FPU_REG **st_new_ptr)
-{
-  *st_new_ptr = &st(-1);
-  return ((FPU_tag_word >> (((FPU_tos - 1) & 7)*2)) & 3) != TAG_Empty;
-}
-
-void  BX_CPP_AttrRegparmN(3) FPU_copy_to_regi(FPU_REG const *r, u_char tag, int stnr)
-{
-  reg_copy(r, &st(stnr));
-  FPU_settagi(stnr, tag);
-}
-
-void  BX_CPP_AttrRegparmN(2) FPU_copy_to_reg1(FPU_REG const *r, u_char tag)
-{
-  reg_copy(r, &st(1));
-  FPU_settagi(1, tag);
-}
-
-void  BX_CPP_AttrRegparmN(2) FPU_copy_to_reg0(FPU_REG const *r, u_char tag)
-{
-  reg_copy(r, &st(0));
-  FPU_settagi(0, tag);
-}
--- a/bochs/fpu/fpu_tags.cc
+++ b/bochs/fpu/fpu_tags.cc
@ -0,0 +1,61 @@
+/////////////////////////////////////////////////////////////////////////
+//  Copyright (C) 2004  MandrakeSoft S.A.
+//
+//    MandrakeSoft S.A.
+//    43, rue d'Aboukir
+//    75002 Paris - France
+//    http://www.linux-mandrake.com/
+//    http://www.mandrakesoft.com/
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+/////////////////////////////////////////////////////////////////////////
+
+#include "softfloat.h"
+#include "softfloat-specialize.h"
+
+/* -----------------------------------------------------------
+ * Slimmed down version used to compile against a CPU simulator
+ * rather than a kernel (ported by Kevin Lawton)
+ * ------------------------------------------------------------ */
+
+#include <cpu/i387.h>
+
+int FPU_tagof(const floatx80 &reg)
+{
+   Bit32s exp = floatx80_exp(reg);
+   if (exp == 0)
+   {
+      if (! floatx80_fraction(reg))
+	  return FPU_Tag_Zero;
+
+      /* The number is a de-normal or pseudodenormal. */
+      return FPU_Tag_Special;
+   }
+
+   if (exp == 0x7fff)
+   {
+      /* Is an Infinity, a NaN, or an unsupported data type. */
+      return FPU_Tag_Special;
+   }
+
+   if (!(reg.fraction & BX_CONST64(0x8000000000000000)))
+   {
+      /* Unsupported data type. */
+      /* Valid numbers have the ms bit set to 1. */
+      return FPU_Tag_Special;
+   }
+
+   return FPU_Tag_Valid;
+}
--- a/bochs/fpu/fpu_trans.cc
+++ b/bochs/fpu/fpu_trans.cc
@ -0,0 +1,474 @@
+/////////////////////////////////////////////////////////////////////////
+//  Copyright (C) 2004  MandrakeSoft S.A.
+//
+//    MandrakeSoft S.A.
+//    43, rue d'Aboukir
+//    75002 Paris - France
+//    http://www.linux-mandrake.com/
+//    http://www.mandrakesoft.com/
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+/////////////////////////////////////////////////////////////////////////
+
+
+#define NEED_CPU_REG_SHORTCUTS 1
+#include "bochs.h"
+#define LOG_THIS BX_CPU_THIS_PTR
+
+#if BX_SUPPORT_FPU
+#include "softfloatx80.h"
+#include "softfloat-specialize.h"
+#endif
+
+extern float_status_t FPU_pre_exception_handling(Bit16u control_word);
+
+/* D9 F0 */
+void BX_CPU_C::F2XM1(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  float_status_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word() | FPU_PR_80_BITS);
+
+  floatx80 result = f2xm1(BX_READ_FPU_REG(0), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("F2XM1: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F1 */
+void BX_CPU_C::FYL2X(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0, 1);
+     return;
+  }
+
+  float_status_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word() | FPU_PR_80_BITS);
+
+  floatx80 result = fyl2x(BX_READ_FPU_REG(0), BX_READ_FPU_REG(1), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FYL2X: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F2 */
+void BX_CPU_C::FPTAN(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+  clear_C2();
+
+  if (! IS_TAG_EMPTY(-1) || IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Overflow);
+
+      /* The masked response */
+      if (BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_default_nan, FPU_Tag_Special, 0);
+          BX_CPU_THIS_PTR the_i387.FPU_push();
+          BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_default_nan, FPU_Tag_Special, 0);
+      }
+
+      return; 
+  }
+
+  float_status_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word() | FPU_PR_80_BITS);
+
+  floatx80 y = BX_READ_FPU_REG(0);
+  if (ftan(y, status) == -1)
+  {
+      FPU_PARTIAL_STATUS |= FPU_SW_C2;
+      return;
+  }
+
+  if (floatx80_is_nan(y))
+  {
+      if (! BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      {
+          BX_WRITE_FPU_REGISTER_AND_TAG(y, FPU_Tag_Special, 0);
+          BX_CPU_THIS_PTR the_i387.FPU_push();
+          BX_WRITE_FPU_REGISTER_AND_TAG(y, FPU_Tag_Special, 0);
+      }
+
+      return;
+  }
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_WRITE_FPU_REG(y, 0);
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(Const_1, FPU_Tag_Valid, 0);
+#else
+  BX_INFO(("FPTAN: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F3 */
+void BX_CPU_C::FPATAN(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0, 1);
+     return;
+  }
+
+  float_status_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word() | FPU_PR_80_BITS);
+
+  floatx80 result = fpatan(BX_READ_FPU_REG(0), BX_READ_FPU_REG(1), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FPATAN: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F4 */
+void BX_CPU_C::FXTRACT(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1) || IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Overflow);
+
+      /* The masked response */
+      if (BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_default_nan, FPU_Tag_Special, 0);
+          BX_CPU_THIS_PTR the_i387.FPU_push();
+          BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_default_nan, FPU_Tag_Special, 0);
+      }
+
+      return; 
+  }
+
+  float_status_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  floatx80 a = BX_READ_FPU_REG(0);
+  floatx80 b = floatx80_extract(a, status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_WRITE_FPU_REG(b, 0);	// exponent
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(a, 0);	// fraction
+#else
+  BX_INFO(("FXTRACT: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F5 */
+void BX_CPU_C::FPREM1(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  float_status_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word() | FPU_PR_80_BITS);
+
+  Bit64u quotient;
+
+  floatx80 a = BX_READ_FPU_REG(0);
+  floatx80 b = BX_READ_FPU_REG(1);
+
+  floatx80 result = floatx80_ieee754_remainder(a, b, quotient, status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  int cc = 0;
+  if (quotient == (Bit64u) -1) cc = FPU_SW_C2;
+  else
+  {
+      if (quotient & 1) cc |= FPU_SW_C1;
+      if (quotient & 2) cc |= FPU_SW_C3;
+      if (quotient & 4) cc |= FPU_SW_C0;
+  }
+  setcc(cc);
+
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FPREM1: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F8 */
+void BX_CPU_C::FPREM(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  float_status_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word() | FPU_PR_80_BITS);
+
+  Bit64u quotient;
+
+  floatx80 a = BX_READ_FPU_REG(0);
+  floatx80 b = BX_READ_FPU_REG(1);
+
+  floatx80 result = floatx80_remainder(a, b, quotient, status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  int cc = 0;
+  if (quotient == (Bit64u) -1) cc = FPU_SW_C2;
+  else
+  {
+      if (quotient & 1) cc |= FPU_SW_C1;
+      if (quotient & 2) cc |= FPU_SW_C3;
+      if (quotient & 4) cc |= FPU_SW_C0;
+  }
+  setcc(cc);
+
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FPREM: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F9 */
+void BX_CPU_C::FYL2XP1(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0, 1);
+     return;
+  }
+
+  float_status_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word() | FPU_PR_80_BITS);
+
+  floatx80 result = fyl2xp1(BX_READ_FPU_REG(0), BX_READ_FPU_REG(1), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FYL2XP1: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 FB */
+void BX_CPU_C::FSINCOS(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+  clear_C2();
+
+  if (! IS_TAG_EMPTY(-1) || IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Overflow);
+
+      /* The masked response */
+      if (BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_default_nan, FPU_Tag_Special, 0);
+          BX_CPU_THIS_PTR the_i387.FPU_push();
+          BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_default_nan, FPU_Tag_Special, 0);
+      }
+
+      return; 
+  }
+
+  float_status_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word() | FPU_PR_80_BITS);
+
+  floatx80 y = BX_READ_FPU_REG(0);
+  floatx80 sin_y, cos_y;
+  if (fsincos(y, &sin_y, &cos_y, status) == -1) 
+  {
+      FPU_PARTIAL_STATUS |= FPU_SW_C2;
+      return;
+  }
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_WRITE_FPU_REG(sin_y, 0);
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(cos_y, 0);
+#else
+  BX_INFO(("FSINCOS: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 FD */
+void BX_CPU_C::FSCALE(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  float_status_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  floatx80 result = floatx80_scale(BX_READ_FPU_REG(0), BX_READ_FPU_REG(1), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FSCALE: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 FE */
+void BX_CPU_C::FSIN(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+  clear_C2();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  float_status_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word() | FPU_PR_80_BITS);
+
+  floatx80 y = BX_READ_FPU_REG(0);
+  if (fsin(y, status) == -1)
+  {
+      FPU_PARTIAL_STATUS |= FPU_SW_C2;
+      return;
+  }
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_WRITE_FPU_REG(y, 0);
+#else
+  BX_INFO(("FSIN: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 FF */
+void BX_CPU_C::FCOS(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+  clear_C2();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  float_status_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word() | FPU_PR_80_BITS);
+
+  floatx80 y = BX_READ_FPU_REG(0);
+  if (fcos(y, status) == -1)
+  {
+      FPU_PARTIAL_STATUS |= FPU_SW_C2;
+      return;
+  }
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_WRITE_FPU_REG(y, 0);
+#else
+  BX_INFO(("FCOS: required FPU, configure --enable-fpu"));
+#endif
+}
--- a/bochs/fpu/fpu_trig.c
+++ b/bochs/fpu/fpu_trig.c
--- a/bochs/fpu/fsincos.cc
+++ b/bochs/fpu/fsincos.cc
@ -0,0 +1,428 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#define FLOAT128
+
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+#include "fpu_constant.h"
+
+static const floatx80 floatx80_one = packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
+
+/* reduce trigonometric function argument using 128-bit precision 
+   M_PI approximation */
+static Bit64u argument_reduction_kernel(Bit64u aSig0, int Exp, Bit64u *zSig0, Bit64u *zSig1)
+{
+    Bit64u term0, term1, term2;
+    Bit64u aSig1 = 0;
+
+    shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
+    Bit64u q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
+    mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
+    sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
+    while ((Bit64s)(*zSig1) < 0) {
+        --q;
+        add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
+    }
+    *zSig1 = term2;
+    return q;
+}
+
+static int reduce_trig_arg(int expDiff, int &zSign, Bit64u &aSig0, Bit64u &aSig1)
+{
+    Bit64u term0, term1, q = 0;
+
+    if (expDiff < 0) {
+        shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
+        expDiff = 0;
+    }
+    if (expDiff > 0) {
+        q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1);
+    }
+    else {
+        if (FLOAT_PI_HI <= aSig0) {
+            aSig0 -= FLOAT_PI_HI;
+            q = 1;
+        }
+    }
+
+    shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
+    if (! lt128(aSig0, aSig1, term0, term1))
+    {
+        int lt = lt128(term0, term1, aSig0, aSig1);
+        int eq = eq128(aSig0, aSig1, term0, term1);
+              
+        if ((eq && (q & 1)) || lt) {
+            zSign = !zSign;
+            ++q;
+        }
+        if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1);
+    }
+
+    return (int)(q & 3);
+}
+
+#define SIN_ARR_SIZE 9
+#define COS_ARR_SIZE 9
+
+static float128 sin_arr[SIN_ARR_SIZE] =
+{
+    packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000)), /*  1 */
+    packFloat128(BX_CONST64(0xbffc555555555555), BX_CONST64(0x5555555555555555)), /*  3 */
+    packFloat128(BX_CONST64(0x3ff8111111111111), BX_CONST64(0x1111111111111111)), /*  5 */
+    packFloat128(BX_CONST64(0xbff2a01a01a01a01), BX_CONST64(0xa01a01a01a01a01a)), /*  7 */
+    packFloat128(BX_CONST64(0x3fec71de3a556c73), BX_CONST64(0x38faac1c88e50017)), /*  9 */
+    packFloat128(BX_CONST64(0xbfe5ae64567f544e), BX_CONST64(0x38fe747e4b837dc7)), /* 11 */
+    packFloat128(BX_CONST64(0x3fde6124613a86d0), BX_CONST64(0x97ca38331d23af68)), /* 13 */
+    packFloat128(BX_CONST64(0xbfd6ae7f3e733b81), BX_CONST64(0xf11d8656b0ee8cb0)), /* 15 */
+    packFloat128(BX_CONST64(0x3fce952c77030ad4), BX_CONST64(0xa6b2605197771b00))  /* 17 */
+};
+
+static float128 cos_arr[COS_ARR_SIZE] =
+{
+    packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000)), /*  0 */
+    packFloat128(BX_CONST64(0xbffe000000000000), BX_CONST64(0x0000000000000000)), /*  2 */
+    packFloat128(BX_CONST64(0x3ffa555555555555), BX_CONST64(0x5555555555555555)), /*  4 */
+    packFloat128(BX_CONST64(0xbff56c16c16c16c1), BX_CONST64(0x6c16c16c16c16c17)), /*  6 */
+    packFloat128(BX_CONST64(0x3fefa01a01a01a01), BX_CONST64(0xa01a01a01a01a01a)), /*  8 */
+    packFloat128(BX_CONST64(0xbfe927e4fb7789f5), BX_CONST64(0xc72ef016d3ea6679)), /* 10 */
+    packFloat128(BX_CONST64(0x3fe21eed8eff8d89), BX_CONST64(0x7b544da987acfe85)), /* 12 */
+    packFloat128(BX_CONST64(0xbfda93974a8c07c9), BX_CONST64(0xd20badf145dfa3e5)), /* 14 */
+    packFloat128(BX_CONST64(0x3fd2ae7f3e733b81), BX_CONST64(0xf11d8656b0ee8cb0))  /* 16 */
+};
+
+extern float128 OddPoly (float128 x, float128 *arr, unsigned n, float_status_t &status);
+
+/* 0 <= x <= pi/4 */
+BX_CPP_INLINE float128 poly_sin(float128 x, float_status_t &status)
+{
+    //                 3     5     7     9     11     13     15
+    //                x     x     x     x     x      x      x
+    // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
+    //                3!    5!    7!    9!    11!    13!    15!
+    //
+    //                 2     4     6     8     10     12     14
+    //                x     x     x     x     x      x      x
+    //   = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
+    //                3!    5!    7!    9!    11!    13!    15!
+    //
+    //           3                          3
+    //          --       4k                --        4k+2
+    //   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
+    //          --  2k                     --  2k+1
+    //          k=0                        k=0
+    //
+    //                          2
+    //   sin(x) ~ x * [ p(x) + x * q(x) ]
+    //
+
+    return OddPoly(x, sin_arr, SIN_ARR_SIZE, status);
+}
+
+extern float128 EvenPoly(float128 x, float128 *arr, unsigned n, float_status_t &status);
+
+/* 0 <= x <= pi/4 */
+BX_CPP_INLINE float128 poly_cos(float128 x, float_status_t &status)
+{
+    //                 2     4     6     8     10     12     14
+    //                x     x     x     x     x      x      x
+    // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
+    //                2!    4!    6!    8!    10!    12!    14!
+    //
+    //           3                          3
+    //          --       4k                --        4k+2
+    //   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
+    //          --  2k                     --  2k+1
+    //          k=0                        k=0
+    //
+    //                      2
+    //   cos(x) ~ [ p(x) + x * q(x) ]
+    //
+
+    return EvenPoly(x, cos_arr, COS_ARR_SIZE, status);
+}
+
+BX_CPP_INLINE void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
+{
+    if (sin_a) *sin_a = a;
+    if (cos_a) *cos_a = a;
+}
+
+BX_CPP_INLINE void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
+{
+    if (sin_a) *sin_a = a;
+    if (cos_a) *cos_a = floatx80_one;
+}
+
+static floatx80 sincos_approximation(int neg, float128 r, Bit64u quotient, float_status_t &status)
+{
+    if (quotient & 0x1) {
+        r = poly_cos(r, status);
+        neg = 0;
+    } else  {
+        r = poly_sin(r, status);
+    }
+
+    floatx80 result = float128_to_floatx80(r, status);
+    if (quotient & 0x2) 
+        neg = ! neg;
+
+    if (neg) 
+        floatx80_chs(result);
+
+    return result;
+}
+
+// =================================================
+// FSINCOS               Compute sin(x) and cos(x)
+// =================================================
+
+//
+// Uses the following identities:
+// ----------------------------------------------------------
+//
+//  sin(-x) = -sin(x)
+//  cos(-x) =  cos(x)
+//
+//  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
+//  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
+//
+//  sin(x+ pi/2)  =  cos(x)
+//  sin(x+ pi)    = -sin(x)
+//  sin(x+3pi/2)  = -cos(x)
+//  sin(x+2pi)    =  sin(x)
+//
+
+int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, float_status_t &status)
+{
+    Bit64u aSig0, aSig1 = 0;
+    Bit32s aExp, zExp, expDiff;
+    int aSign, zSign;
+    int q = 0;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a)) 
+    {
+        goto invalid;
+    }
+
+    aSig0 = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+     
+    /* invalid argument */
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig0<<1)) {
+            sincos_invalid(sin_a, cos_a, propagateFloatx80NaN(a, status));
+            return 0;
+        }
+
+    invalid:
+        float_raise(status, float_flag_invalid);
+        sincos_invalid(sin_a, cos_a, floatx80_default_nan);
+        return 0;
+    }
+
+    if (aExp == 0) {
+        if (aSig0 == 0) {
+            sincos_tiny_argument(sin_a, cos_a, a);
+            return 0;
+        }
+
+        float_raise(status, float_flag_denormal);
+
+        /* handle pseudo denormals */
+        if (! (aSig0 & BX_CONST64(0x8000000000000000)))
+        {
+            float_raise(status, float_flag_inexact);
+            if (sin_a)
+                float_raise(status, float_flag_underflow);
+            sincos_tiny_argument(sin_a, cos_a, a);
+            return 0;
+        }
+
+        normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+    }
+    
+    zSign = aSign;
+    zExp = EXP_BIAS;
+    expDiff = aExp - zExp;
+
+    /* argument is out-of-range */
+    if (expDiff >= 63) 
+        return -1;
+
+    float_raise(status, float_flag_inexact);
+
+    if (expDiff < -1) {    // doesn't require reduction
+        if (expDiff <= -68) {
+            a = packFloatx80(aSign, aExp, aSig0);
+            sincos_tiny_argument(sin_a, cos_a, a);
+            return 0;
+        }
+        zExp = aExp;
+    }
+    else {
+        q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
+    }
+
+    /* **************************** */
+    /* argument reduction completed */
+    /* **************************** */
+
+    /* using float128 for approximation */
+    float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status);
+
+    if (aSign) q = -q;
+    if (sin_a) *sin_a = sincos_approximation(zSign, r,   q, status);
+    if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1, status);
+
+    return 0;
+}
+
+int fsin(floatx80 &a, float_status_t &status)
+{
+    return fsincos(a, &a, 0, status);
+}
+
+int fcos(floatx80 &a, float_status_t &status)
+{
+    return fsincos(a, 0, &a, status);
+}
+
+// =================================================
+// FPTAN                 Compute tan(x)
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//
+//  sin(-x) = -sin(x)
+//  cos(-x) =  cos(x)
+//
+//  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
+//  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
+//
+//  sin(x+ pi/2)  =  cos(x)
+//  sin(x+ pi)    = -sin(x)
+//  sin(x+3pi/2)  = -cos(x)
+//  sin(x+2pi)    =  sin(x)
+//
+// 2. ----------------------------------------------------------
+//
+//           sin(x)
+//  tan(x) = ------
+//           cos(x)
+//
+
+int ftan(floatx80 &a, float_status_t &status)
+{
+    Bit64u aSig0, aSig1 = 0;
+    Bit32s aExp, zExp, expDiff;
+    int aSign, zSign; 
+    int q = 0;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a)) 
+    {
+        goto invalid;
+    }
+
+    aSig0 = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+     
+    /* invalid argument */
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig0<<1))
+        {
+            a = propagateFloatx80NaN(a, status);
+            return 0;
+        }
+
+    invalid:
+        float_raise(status, float_flag_invalid);
+        a = floatx80_default_nan;
+        return 0;
+    }
+
+    if (aExp == 0) {
+        if (aSig0 == 0) return 0;
+        float_raise(status, float_flag_denormal);
+        /* handle pseudo denormals */
+        if (! (aSig0 & BX_CONST64(0x8000000000000000)))
+        {
+            float_raise(status, float_flag_inexact | float_flag_underflow);
+            return 0;
+        }
+        normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+    }
+    
+    zSign = aSign;
+    zExp = EXP_BIAS;
+    expDiff = aExp - zExp;
+
+    /* argument is out-of-range */
+    if (expDiff >= 63) 
+        return -1;
+
+    float_raise(status, float_flag_inexact);
+
+    if (expDiff < -1) {    // doesn't require reduction
+        if (expDiff <= -68) {
+            a = packFloatx80(aSign, aExp, aSig0);
+            return 0;
+        }
+        zExp = aExp;
+    }
+    else {
+        q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
+    }
+
+    /* **************************** */
+    /* argument reduction completed */
+    /* **************************** */
+
+    /* using float128 for approximation */
+    float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1, status);
+
+    float128 sin_r = poly_sin(r, status);
+    float128 cos_r = poly_cos(r, status);
+
+    if (q & 0x1) {
+        r = float128_div(cos_r, sin_r, status);
+        zSign = ! zSign;
+    } else {
+        r = float128_div(sin_r, cos_r, status);
+    }
+
+    a = float128_to_floatx80(r, status);
+    if (zSign)
+        floatx80_chs(a);
+
+    return 0;
+}
--- a/bochs/fpu/fyl2x.cc
+++ b/bochs/fpu/fyl2x.cc
@ -0,0 +1,335 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#define FLOAT128
+
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+
+static const floatx80 floatx80_one =
+    packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
+
+static const float128 float128_one =
+    packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000));
+static const float128 float128_two =
+    packFloat128(BX_CONST64(0x4000000000000000), BX_CONST64(0x0000000000000000));
+static const float128 float128_ln2 = 
+    packFloat128(BX_CONST64(0x3ffe62e42fefa39e), BX_CONST64(0xf35793c7673007e6));
+
+static const float128 float128_ln2inv  = 
+    packFloat128(BX_CONST64(0x3FFF71547652b82f), BX_CONST64(0xe1777d0ffda0d23a));
+static const float128 float128_ln2inv2 = 
+    packFloat128(BX_CONST64(0x400071547652b82f), BX_CONST64(0xe1777d0ffda0d23a));
+
+#define SQRT2_HALF_SIG 	BX_CONST64(0xb504f333f9de6484)
+
+extern float128 OddPoly(float128 x, float128 *arr, unsigned n, float_status_t &status);
+
+#define L2_ARR_SIZE 9
+
+static float128 ln_arr[L2_ARR_SIZE] =
+{
+    packFloat128(BX_CONST64(0x3fff000000000000), BX_CONST64(0x0000000000000000)), /*  1 */
+    packFloat128(BX_CONST64(0x3ffd555555555555), BX_CONST64(0x5555555555555555)), /*  3 */
+    packFloat128(BX_CONST64(0x3ffc999999999999), BX_CONST64(0x999999999999999a)), /*  5 */
+    packFloat128(BX_CONST64(0x3ffc249249249249), BX_CONST64(0x2492492492492492)), /*  7 */
+    packFloat128(BX_CONST64(0x3ffbc71c71c71c71), BX_CONST64(0xc71c71c71c71c71c)), /*  9 */
+    packFloat128(BX_CONST64(0x3ffb745d1745d174), BX_CONST64(0x5d1745d1745d1746)), /* 11 */
+    packFloat128(BX_CONST64(0x3ffb3b13b13b13b1), BX_CONST64(0x3b13b13b13b13b14)), /* 13 */
+    packFloat128(BX_CONST64(0x3ffb111111111111), BX_CONST64(0x1111111111111111)), /* 15 */
+    packFloat128(BX_CONST64(0x3ffae1e1e1e1e1e1), BX_CONST64(0xe1e1e1e1e1e1e1e2))  /* 17 */
+};
+
+static float128 poly_ln(float128 x1, float_status_t &status)
+{
+/*
+    //
+    //                     3     5     7     9     11     13     15
+    //        1+u         u     u     u     u     u      u      u
+    // 1/2 ln ---  ~ u + --- + --- + --- + --- + ---- + ---- + ---- =
+    //        1-u         3     5     7     9     11     13     15
+    //
+    //                     2     4     6     8     10     12     14
+    //                    u     u     u     u     u      u      u
+    //       = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] =
+    //                    3     5     7     9     11     13     15
+    //
+    //           3                          3
+    //          --       4k                --        4k+2
+    //   p(u) = >  C  * u           q(u) = >  C   * u
+    //          --  2k                     --  2k+1
+    //          k=0                        k=0
+    //
+    //          1+u                 2
+    //   1/2 ln --- ~ u * [ p(u) + u * q(u) ]
+    //          1-u
+    //
+*/
+    return OddPoly(x1, ln_arr, L2_ARR_SIZE, status);
+}
+
+/* required sqrt(2)/2 < x < sqrt(2) */
+static float128 poly_l2(float128 x, float_status_t &status)
+{
+    /* using float128 for approximation */
+    float128 x_p1 = float128_add(x, float128_one, status);
+    float128 x_m1 = float128_sub(x, float128_one, status);
+    x = float128_div(x_m1, x_p1, status);
+    x = poly_ln(x, status);
+    x = float128_mul(x, float128_ln2inv2, status);
+    return x;
+}
+
+static float128 poly_l2p1(float128 x, float_status_t &status)
+{
+    /* using float128 for approximation */
+    float128 x_p2 = float128_add(x, float128_two, status);
+    x = float128_div(x, x_p2, status);
+    x = poly_ln(x, status);
+    x = float128_mul(x, float128_ln2inv2, status);
+    return x;
+}
+
+// =================================================
+// FYL2X                   Compute y * log (x)
+//                                        2
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//              ln(x)
+//   log (x) = -------,  ln (x*y) = ln(x) + ln(y)
+//      2       ln(2)
+//
+// 2. ----------------------------------------------------------
+//                1+u             x-1
+//   ln (x) = ln -----, when u = -----
+//                1-u             x+1
+//
+// 3. ----------------------------------------------------------
+//                        3     5     7           2n+1
+//       1+u             u     u     u           u
+//   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
+//       1-u             3     5     7           2n+1
+//
+
+floatx80 fyl2x(floatx80 a, floatx80 b, float_status_t &status)
+{
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a)) {
+invalid:
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    Bit64u aSig = extractFloatx80Frac(a);
+    Bit32s aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+    Bit64u bSig = extractFloatx80Frac(b);
+    Bit32s bExp = extractFloatx80Exp(b);
+    int bSign = extractFloatx80Sign(b);
+
+    int zSign = bSign ^ 1;
+     
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1)
+             || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) 
+        {
+            return propagateFloatx80NaN(a, b, status);
+        }
+        if (aSign) goto invalid;
+        else {
+            if (bExp == 0) {
+                if (bSig == 0) goto invalid;
+                float_raise(status, float_flag_denormal);
+            }
+            return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+    }
+    if (bExp == 0x7FFF)
+    {
+        if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (aSign && (Bit64u)(aExp | aSig)) goto invalid;
+        if (aSig && (aExp == 0)) 
+            float_raise(status, float_flag_denormal);
+        if (aExp < 0x3FFF) {
+            return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+        if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0)) goto invalid;
+        return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if ((bExp | bSig) == 0) goto invalid;
+            float_raise(status, float_flag_divbyzero);
+            return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+        if (aSign) goto invalid;
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (aSign) goto invalid;
+    if (bExp == 0) {
+        if (bSig == 0) {
+            if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0);
+            return packFloatx80(bSign, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    if (aExp == 0x3FFF && ((Bit64u) (aSig<<1) == 0)) 
+        return packFloatx80(bSign, 0, 0);
+
+    float_raise(status, float_flag_inexact);
+
+    int ExpDiff = aExp - 0x3FFF;
+    aExp = 0;
+    if (aSig >= SQRT2_HALF_SIG) {
+        ExpDiff++;
+        aExp--;
+    }
+
+    /* ******************************** */
+    /* using float128 for approximation */
+    /* ******************************** */
+
+    float128 x = normalizeRoundAndPackFloat128(0, aExp+0x3FEF, aSig, 0, status);
+    x = poly_l2(x, status);
+    floatx80 r = float128_to_floatx80(x, status);
+    r = floatx80_add(r, int32_to_floatx80(ExpDiff), status);
+    return floatx80_mul(r, b, status);
+}
+
+// =================================================
+// FYL2XP1                 Compute y * log (x + 1)
+//                                        2
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//              ln(x)
+//   log (x) = -------
+//      2       ln(2)
+//
+// 2. ----------------------------------------------------------
+//                  1+u              x
+//   ln (x+1) = ln -----, when u = -----
+//                  1-u             x+2
+//
+// 3. ----------------------------------------------------------
+//                        3     5     7           2n+1
+//       1+u             u     u     u           u
+//   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
+//       1-u             3     5     7           2n+1
+//
+
+floatx80 fyl2xp1(floatx80 a, floatx80 b, float_status_t &status)
+{
+    Bit64u aSig, bSig;
+    Bit32s aExp, bExp;
+    int aSign, bSign;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a)) {
+invalid:
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+    bSign = extractFloatx80Sign(b);
+    int zSign = aSign ^ bSign;
+     
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1)
+             || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) 
+        {
+            return propagateFloatx80NaN(a, b, status);
+        }
+        if (aSign) goto invalid;
+        else {
+            if (bExp == 0) {
+                if (bSig == 0) goto invalid;
+                float_raise(status, float_flag_denormal);
+            }
+            return packFloatx80(bSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+    }
+    if (bExp == 0x7FFF)
+    {
+        if ((Bit64u) (bSig<<1)) 
+            return propagateFloatx80NaN(a, b, status);
+
+        if (aExp == 0) {
+            if (aSig == 0) goto invalid;
+            float_raise(status, float_flag_denormal);
+        }
+
+        return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+            return packFloatx80(zSign, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) return packFloatx80(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+
+    float_raise(status, float_flag_inexact);
+
+    if (aSign && aExp >= 0x3FFF) 
+        return a;
+
+    if (aExp >= 0x3FFC) // big argument
+    {
+        return fyl2x(floatx80_add(a, floatx80_one, status), b, status);
+    }
+
+    /* ******************************** */
+    /* using float128 for approximation */
+    /* ******************************** */
+
+    float128 x = normalizeRoundAndPackFloat128(aSign, aExp-0x10, aSig, 0, status);
+    if (aExp < EXP_BIAS-70) // handle tiny argument
+        x = float128_mul(x, float128_ln2inv, status);
+    else                    // regular polynomial approximation
+        x = poly_l2p1(x, status);
+    floatx80 r = float128_to_floatx80(x, status);
+    return floatx80_mul(r, b, status);
+}
--- a/bochs/fpu/load_store.c
+++ b/bochs/fpu/load_store.c
@ -1,231 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  load_store.c                                                             |
- |  $Id: load_store.c,v 1.13 2004-02-11 19:40:25 sshwarts Exp $
- |                                                                           |
- | This file contains most of the code to interpret the FPU instructions     |
- | which load and store from user memory.                                    |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1997                                         |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail   billm@suburbia.net             |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- | Note:                                                                     |
- |    The file contains code which accesses user memory.                     |
- |    Emulator static data may change when user memory is accessed, due to   |
- |    other processes using the emulator while swapping is in progress.      |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_system.h"
-#include "exception.h"
-#include "fpu_emu.h"
-#include "status_w.h"
-#include "control_w.h"
-
-
-#define _NONE_ 0   /* st0_ptr etc not needed */
-#define _REG0_ 1   /* Will be storing st(0) */
-#define _PUSH_ 3   /* Need to check for space to push onto stack */
-#define _null_ 4   /* Function illegal or not implemented */
-
-#define pop_0()	{ FPU_settag0(TAG_Empty); FPU_tos++; }
-
-
-static u_char const type_table[32] = {
-  _PUSH_, _PUSH_, _PUSH_, _PUSH_,
-  _null_, _null_, _null_, _null_,
-  _REG0_, _REG0_, _REG0_, _REG0_,
-  _REG0_, _REG0_, _REG0_, _REG0_,
-  _NONE_, _null_, _NONE_, _PUSH_,
-  _NONE_, _PUSH_, _null_, _PUSH_,
-  _NONE_, _null_, _NONE_, _REG0_,
-  _NONE_, _REG0_, _NONE_, _REG0_
-  };
-
-int FPU_load_store(u_char type, fpu_addr_modes addr_modes,
-		     bx_address data_address)
-{
-  FPU_REG loaded_data;
-  FPU_REG *st0_ptr;
-  u_char st0_tag = TAG_Empty;  /* This is just to stop a gcc warning. */
-  u_char loaded_tag;
-
-  st0_ptr = (FPU_REG*) NULL;   /* Initialized just to stop compiler warnings. */
-
-  switch (type_table[type])
-    {
-    case _NONE_:
-      break;
-    case _REG0_:
-      st0_ptr = &st(0);       /* Some of these instructions pop after
-				 storing */
-      st0_tag = FPU_gettag0();
-      break;
-    case _PUSH_:
-      {
-	if ( FPU_gettagi(-1) != TAG_Empty )
-	  { FPU_stack_overflow(); return 0; }
-	FPU_tos--;
-	st0_ptr = &st(0);
-      }
-      break;
-    case _null_:
-      FPU_illegal();
-      return 0;
-#ifdef PARANOID
-    default:
-      INTERNAL(0x141);
-      return 0;
-#endif /* PARANOID */
-    }
-
-  switch (type)
-    {
-    case 000:       /* fld m32real */
-      clear_C1();
-      loaded_tag = FPU_load_single(data_address, &loaded_data);
-      if ( (loaded_tag == TAG_Special)
-	   && isNaN(&loaded_data)
-	   && (real_1op_NaN(&loaded_data) < 0) )
-	{
-	  FPU_tos++;
-	  break;
-	}
-      FPU_copy_to_reg0(&loaded_data, loaded_tag);
-      break;
-    case 001:      /* fild m32int */
-      clear_C1();
-      loaded_tag = FPU_load_int32(data_address, &loaded_data);
-      FPU_copy_to_reg0(&loaded_data, loaded_tag);
-      break;
-    case 002:      /* fld m64real */
-      clear_C1();
-      loaded_tag = FPU_load_double(data_address, &loaded_data);
-      if ((loaded_tag == TAG_Special)
-	   && isNaN(&loaded_data)
-	   && (real_1op_NaN(&loaded_data) < 0))
-	{
-	  FPU_tos++;
-	  break;
-	}
-      FPU_copy_to_reg0(&loaded_data, loaded_tag);
-      break;
-    case 003:      /* fild m16int */
-      clear_C1();
-      loaded_tag = FPU_load_int16(data_address, &loaded_data);
-      FPU_copy_to_reg0(&loaded_data, loaded_tag);
-      break;
-    case 010:      /* fst m32real */
-      clear_C1();
-      FPU_store_single(st0_ptr, st0_tag, data_address);
-      break;
-    case 011:      /* fist m32int */
-      clear_C1();
-      FPU_store_int32(st0_ptr, st0_tag, data_address);
-      break;
-    case 012:     /* fst m64real */
-      clear_C1();
-      FPU_store_double(st0_ptr, st0_tag, data_address);
-      break;
-    case 013:     /* fist m16int */
-      clear_C1();
-      FPU_store_int16(st0_ptr, st0_tag, data_address);
-      break;
-    case 014:     /* fstp m32real */
-      clear_C1();
-      if ( FPU_store_single(st0_ptr, st0_tag, data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 015:     /* fistp m32int */
-      clear_C1();
-      if ( FPU_store_int32(st0_ptr, st0_tag, data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 016:     /* fstp m64real */
-      clear_C1();
-      if ( FPU_store_double(st0_ptr, st0_tag, data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 017:     /* fistp m16int */
-      clear_C1();
-      if ( FPU_store_int16(st0_ptr, st0_tag, data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 020:     /* fldenv  m14/28byte */
-      fldenv(addr_modes, data_address);
-      /* Ensure that the values just loaded are not changed by
-	 fix-up operations. */
-      return 1;
-    case 022:     /* frstor m94/108byte */
-      frstor(addr_modes, data_address);
-      /* Ensure that the values just loaded are not changed by
-	 fix-up operations. */
-      return 1;
-    case 023:     /* fbld m80dec */
-      clear_C1();
-      loaded_tag = FPU_load_bcd(data_address);
-      FPU_settag0(loaded_tag);
-      break;
-    case 024:     /* fldcw */
-      FPU_verify_area(VERIFY_READ, data_address, 2);
-      FPU_get_user(FPU_control_word, data_address, 2);
-      if ( FPU_partial_status & ~FPU_control_word & CW_Exceptions )
-	FPU_partial_status |= (SW_Summary | SW_Backward);
-      else
-	FPU_partial_status &= ~(SW_Summary | SW_Backward);
-#ifdef PECULIAR_486
-      FPU_control_word |= 0x40;  /* An 80486 appears to always set this bit */
-#endif /* PECULIAR_486 */
-      return 1;
-    case 025:      /* fld m80real */
-      clear_C1();
-      loaded_tag = FPU_load_extended(data_address, 0);
-      FPU_settag0(loaded_tag);
-      break;
-    case 027:      /* fild m64int */
-      clear_C1();
-      loaded_tag = FPU_load_int64(data_address);
-      FPU_settag0(loaded_tag);
-      break;
-    case 030:     /* fstenv  m14/28byte */
-      fstenv(addr_modes, data_address);
-      return 1;
-    case 032:      /* fsave */
-      fsave(addr_modes, data_address);
-      return 1;
-    case 033:      /* fbstp m80dec */
-      clear_C1();
-      if ( FPU_store_bcd(st0_ptr, st0_tag, data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 034:      /* fstcw m16int */
-      FPU_verify_area(VERIFY_WRITE,data_address,2);
-      FPU_put_user(FPU_control_word, data_address, 2);
-      return 1;
-    case 035:      /* fstp m80real */
-      clear_C1();
-      if ( FPU_store_extended(st0_ptr, st0_tag, data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    case 036:      /* fstsw m2byte */
-      FPU_verify_area(VERIFY_WRITE,data_address,2);
-      FPU_put_user(status_word(),data_address, 2);
-      return 1;
-    case 037:      /* fistp m64int */
-      clear_C1();
-      if ( FPU_store_int64(st0_ptr, st0_tag, data_address) )
-	pop_0();  /* pop only if the number was actually stored
-		     (see the 80486 manual p16-28) */
-      break;
-    }
-  return 0;
-}
--- a/bochs/fpu/mul_Xsig.c
+++ b/bochs/fpu/mul_Xsig.c
@ -1,96 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  mul_Xsig.S                                                               |
- |  $Id: mul_Xsig.c,v 1.2 2001-10-06 03:53:46 bdenney Exp $
- |                                                                           |
- | Multiply a 12 byte fixed point number by another fixed point number.      |
- |                                                                           |
- | Copyright (C) 1992,1994,1995                                              |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
- |                                                                           |
- |                                                                           |
- | The result is neither rounded nor normalized, and the ls bit or so may    |
- | be wrong.                                                                 |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-
-#include "fpu_emu.h"
-#include "poly.h"
-
-
-void mul32_Xsig(Xsig *x, const u32 ba)
-{
-  Xsig y;
-  u32 zl;
-  u64 b = ba, z;
-
-  z = b * x->lsw;
-  y.lsw = z >> 32;
-
-  z = b * x->midw;
-  y.midw = z >> 32;
-  zl = z;
-  y.lsw += zl;
-  if ( zl > y.lsw )
-    y.midw ++;
-
-  z = b * x->msw;
-  y.msw = z >> 32;
-  zl = z;
-  y.midw += zl;
-  if ( zl > y.midw )
-    y.msw ++;
-
-  *x = y;
-
-}
-
-
-void mul64_Xsig(Xsig *x, const u64 *b)
-{
-  Xsig yh, yl;
-
-  yh = *x;
-  yl = *x;
-  mul32_Xsig(&yh, (*b) >> 32);
-  mul32_Xsig(&yl, *b);
-
-  x->msw = yh.msw;
-  x->midw = yh.midw + yl.msw;
-  if ( yh.midw > x->midw )
-    x->msw ++;
-  x->lsw = yh.lsw + yl.midw;
-  if ( yh.lsw > x->lsw )
-    {
-      x->midw ++;
-      if ( x->midw == 0 )
-	x->msw ++;
-    }
-
-}
-
-
-void mul_Xsig_Xsig(Xsig *x, const Xsig *b)
-{
-  u32 yh;
-  u64 y, z;
-
-  y = b->lsw;
-  y *= x->msw;
-  yh = y >> 32;
-
-  z = b->msw;
-  z <<= 32;
-  z += b->midw;
-  mul64_Xsig(x, &z);
-
-  x->lsw += yh;
-  if ( yh > x->lsw )
-    {
-      x->midw ++;
-      if ( x->midw == 0 )
-	x->msw ++;
-    }
-}
-
--- a/bochs/fpu/poly.cc
+++ b/bochs/fpu/poly.cc
@ -0,0 +1,105 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#define FLOAT128
+
+#include <assert.h>
+#include "softfloat.h"
+
+//                            2         3         4               n
+// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//         0    1         2         3         4               n
+//
+//          --       2k                --        2k+1
+//   p(x) = >  C  * x           q(x) = >  C   * x        
+//          --  2k                     --  2k+1
+//                                        
+//   f(x) ~ [ p(x) + x * q(x) ]
+//
+
+float128 EvalPoly(float128 x, float128 *arr, unsigned n, float_status_t &status)
+{
+    float128 x2 = float128_mul(x, x, status);
+    unsigned i;
+
+    assert(n > 1);
+
+    float128 r1 = arr[--n];
+    i = n;
+    while(i >= 2) {
+        r1 = float128_mul(r1, x2, status);
+        i -= 2;
+        r1 = float128_add(r1, arr[i], status);
+    }
+    if (i) r1 = float128_mul(r1, x, status);
+
+    float128 r2 = arr[--n];
+    i = n;
+    while(i >= 2) {
+        r2 = float128_mul(r2, x2, status);
+        i -= 2;
+        r2 = float128_add(r2, arr[i], status);
+    }
+    if (i) r2 = float128_mul(r2, x, status);
+
+    return float128_add(r1, r2, status);
+}
+
+//                  2         4         6         8               2n
+// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//         0    1         2         3         4               n
+//
+//          --       4k                --        4k+2
+//   p(x) = >  C  * x           q(x) = >  C   * x        
+//          --  2k                     --  2k+1
+//                                        
+//                    2
+//   f(x) ~ [ p(x) + x * q(x) ]
+//
+
+float128 EvenPoly(float128 x, float128 *arr, unsigned n, float_status_t &status)
+{
+     return EvalPoly(float128_mul(x, x, status), arr, n, status);
+}
+
+//                        3         5         7         9               2n+1
+// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//          0         1         2         3         4               n
+//                        2         4         6         8               2n
+//      = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//               0    1         2         3         4               n
+//
+//          --       4k                --        4k+2
+//   p(x) = >  C  * x           q(x) = >  C   * x        
+//          --  2k                     --  2k+1
+//                                        
+//                        2
+//   f(x) ~ x * [ p(x) + x * q(x) ]
+//
+
+float128 OddPoly(float128 x, float128 *arr, unsigned n, float_status_t &status)
+{
+     return float128_mul(x, EvenPoly(x, arr, n, status), status);
+}
--- a/bochs/fpu/poly.h
+++ b/bochs/fpu/poly.h
@ -1,222 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  poly.h                                                                   |
- |  $Id: poly.h,v 1.7 2004-02-20 01:41:59 danielg4 Exp $
- |                                                                           |
- |  Header file for the FPU-emu poly*.c source files.                        |
- |                                                                           |
- | Copyright (C) 1994,1999                                                   |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail   billm@melbpc.org.au            |
- |                                                                           |
- | Declarations and definitions for functions operating on Xsig (12-byte     |
- | extended-significand) quantities.                                         |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#ifndef _POLY_H
-#define _POLY_H
-
-/* This 12-byte structure is used to improve the accuracy of computation
-   of transcendental functions.
-   Intended to be used to get results better than 8-byte computation
-   allows. 9-byte would probably be sufficient.
-   */
-#if defined(__MWERKS__) && defined(macintosh)
-#pragma options align=packed
-#endif
-typedef struct {
-#ifdef EMU_BIG_ENDIAN
-  u32 msw;
-  u32 midw;
-  u32 lsw;
-#else
-  u32 lsw;
-  u32 midw;
-  u32 msw;
-#endif
-} Xsig GCC_ATTRIBUTE((packed));
-#if defined(__MWERKS__) && defined(macintosh)
-#pragma options align=reset
-#endif
-
-asmlinkage void mul64(u64 const *a, u64 const *b,
-		      u64 *result);
-asmlinkage void polynomial_Xsig(Xsig *, const u64 *x,
-				const u64 terms[], const int n);
-
-asmlinkage void mul32_Xsig(Xsig *, const u32 mult);
-asmlinkage void mul64_Xsig(Xsig *, const u64 *mult);
-asmlinkage void mul_Xsig_Xsig(Xsig *dest, const Xsig *mult);
-
-asmlinkage void shr_Xsig(Xsig *, const int n);
-asmlinkage int round_Xsig(Xsig *);
-asmlinkage int norm_Xsig(Xsig *);
-asmlinkage void div_Xsig(const Xsig *x1, const Xsig *x2, Xsig *dest);
-
-/* Macro to extract the most significant 32 bits from a 64bit quantity */
-#ifdef EMU_BIG_ENDIAN
-#define LL_MSW(x)     (((u32 *)&x)[0])
-#else
-#define LL_MSW(x)     (((u32 *)&x)[1])
-#endif
-
-/* Macro to initialize an Xsig struct */
-#ifdef EMU_BIG_ENDIAN
-#define MK_XSIG(a,b,c)     { a, b, c }
-#else
-#define MK_XSIG(a,b,c)     { c, b, a }
-#endif
-
-/* Macro to access the 8 ms bytes of an Xsig as a 64bit quantity */
-#ifdef EMU_BIG_ENDIAN
-#define XSIG_LL(x)         (*(u64 *)&x.msw)
-#else
-#define XSIG_LL(x)         (*(u64 *)&x.midw)
-#endif
-
-
-/*
-   Need to run gcc with optimizations on to get these to
-   actually be in-line.
-   */
-
-/* Multiply two fixed-point 32 bit numbers, producing a 32 bit result.
-   The answer is the ms word of the product.  */
-BX_C_INLINE
-u32 mul_32_32(const u32 arg1, const u32 arg2)
-{
-#ifdef NO_ASSEMBLER
-  return (((u64)arg1) * arg2) >> 32;
-#else
-/* Some versions of gcc make it difficult to stop eax from being clobbered.
-   Merely specifying that it is used doesn't work...
- */
-  int retval;
-  asm volatile ("mull %2; movl %%edx,%%eax" \
-		:"=a" (retval) \
-		:"0" (arg1), "g" (arg2) \
-		:"dx");
-  return retval;
-#endif
-}
-
-
-/* Add the 12 byte Xsig x2 to Xsig dest, with no checks for overflow. */
-BX_C_INLINE
-void add_Xsig_Xsig(Xsig *dest, const Xsig *x2)
-{
-#ifdef NO_ASSEMBLER
-  dest->lsw += x2->lsw;
-  if ( dest->lsw < x2->lsw )
-    {
-      dest->midw ++;
-      if ( dest->midw == 0 )
-	dest->msw ++;
-    }
-  dest->midw += x2->midw;
-  if ( dest->midw < x2->midw )
-    {
-      dest->msw ++;
-    }
-  dest->msw += x2->msw;
-#else
-  asm volatile ("movl %1,%%edi; movl %2,%%esi; "
-                "movl (%%esi),%%eax; addl %%eax,(%%edi); "
-                "movl 4(%%esi),%%eax; adcl %%eax,4(%%edi); "
-                "movl 8(%%esi),%%eax; adcl %%eax,8(%%edi);"
-                 :"=g" (*dest):"g" (dest), "g" (x2)
-                 :"ax","si","di");
-#endif
-}
-
-
-/* Add the 12 byte Xsig x2 to Xsig dest, adjust exp if overflow occurs. */
-BX_C_INLINE
-void add_two_Xsig(Xsig *dest, const Xsig *x2, s32 *exp)
-{
-#ifdef NO_ASSEMBLER
-  int ovfl = 0;
-
-  dest->lsw += x2->lsw;
-  if ( dest->lsw < x2->lsw )
-    {
-      dest->midw ++;
-      if ( dest->midw == 0 )
-	{
-	  dest->msw ++;
-	  if ( dest->msw == 0 )
-	    ovfl = 1;
-	}
-    }
-  dest->midw += x2->midw;
-  if ( dest->midw < x2->midw )
-    {
-      dest->msw ++;
-      if ( dest->msw == 0 )
-	ovfl = 1;
-    }
-  dest->msw += x2->msw;
-  if ( dest->msw < x2->msw )
-    ovfl = 1;
-  if ( ovfl )
-    {
-      (*exp) ++;
-      dest->lsw >>= 1;
-      if ( dest->midw & 1 )
-	dest->lsw |= 0x80000000;
-      dest->midw >>= 1;
-      if ( dest->msw & 1 )
-	dest->midw |= 0x80000000;
-      dest->msw >>= 1;
-      dest->msw |= 0x80000000;
-    }
-#else
-/* Note: the constraints in the asm statement didn't always work properly
-   with gcc 2.5.8.  Changing from using edi to using ecx got around the
-   problem, but keep fingers crossed! */
-  asm volatile ("movl %2,%%ecx; movl %3,%%esi; "
-                "movl (%%esi),%%eax; addl %%eax,(%%ecx); "
-                "movl 4(%%esi),%%eax; adcl %%eax,4(%%ecx); "
-                "movl 8(%%esi),%%eax; adcl %%eax,8(%%ecx); "
-                "jnc 0f; "
-		"rcrl 8(%%ecx); rcrl 4(%%ecx); rcrl (%%ecx); "
-                "movl %4,%%ecx; incl (%%ecx); "
-                "movl $1,%%eax; jmp 1f; "
-                "0: xorl %%eax,%%eax; "
-                " 1:"
-		:"=g" (*exp), "=g" (*dest)
-		:"g" (dest), "g" (x2), "g" (exp)
-		:"cx","si","ax");
-#endif
-}
-
-
-/* Negate the 12 byte Xsig */
-BX_C_INLINE
-void negate_Xsig(Xsig *x)
-{
-#ifdef NO_ASSEMBLER
-  x->lsw = ~x->lsw;
-  x->midw = ~x->midw;
-  x->msw = ~x->msw;
-  x->lsw ++;
-  if ( x->lsw == 0 )
-    {
-      x->midw ++;
-      if ( x->midw == 0 )
-	x->msw ++;
-    }
-#else
-/* Negate (subtract from 1.0) the 12 byte Xsig */
-/* This is faster in a loop on my 386 than using the "neg" instruction. */
-  asm volatile("movl %1,%%esi; "
-               "xorl %%ecx,%%ecx; "
-               "movl %%ecx,%%eax; subl (%%esi),%%eax; movl %%eax,(%%esi); "
-               "movl %%ecx,%%eax; sbbl 4(%%esi),%%eax; movl %%eax,4(%%esi); "
-               "movl %%ecx,%%eax; sbbl 8(%%esi),%%eax; movl %%eax,8(%%esi); "
-               :"=g" (*x):"g" (x):"si","ax","cx");
-#endif
-}
-
-
-#endif /* _POLY_H */
--- a/bochs/fpu/poly_2xm1.c
+++ b/bochs/fpu/poly_2xm1.c
@ -1,157 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  poly_2xm1.c                                                              |
- |  $Id: poly_2xm1.c,v 1.6 2003-10-05 12:26:11 sshwarts Exp $
- |                                                                           |
- | Function to compute 2^x-1 by a polynomial approximation.                  |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1997                                         |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@suburbia.net                              |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "reg_constant.h"
-#include "fpu_emu.h"
-#include "fpu_system.h"
-#include "control_w.h"
-#include "poly.h"
-
-
-#define	HIPOWER	11
-static const u64 lterms[HIPOWER] =
-{
-  BX_CONST64(0x0000000000000000),  /* This term done separately as 12 bytes */
-  BX_CONST64(0xf5fdeffc162c7543),
-  BX_CONST64(0x1c6b08d704a0bfa6),
-  BX_CONST64(0x0276556df749cc21),
-  BX_CONST64(0x002bb0ffcf14f6b8),
-  BX_CONST64(0x0002861225ef751c),
-  BX_CONST64(0x00001ffcbfcd5422),
-  BX_CONST64(0x00000162c005d5f1),
-  BX_CONST64(0x0000000da96ccb1b),
-  BX_CONST64(0x0000000078d1b897),
-  BX_CONST64(0x000000000422b029)
-};
-
-static const Xsig hiterm = MK_XSIG(0xb17217f7, 0xd1cf79ab, 0xc8a39194);
-
-/* Four slices: 0.0 : 0.25 : 0.50 : 0.75 : 1.0,
-   These numbers are 2^(1/4), 2^(1/2), and 2^(3/4)
- */
-static const Xsig shiftterm0 = MK_XSIG(0, 0, 0);
-static const Xsig shiftterm1 = MK_XSIG(0x9837f051, 0x8db8a96f, 0x46ad2318);
-static const Xsig shiftterm2 = MK_XSIG(0xb504f333, 0xf9de6484, 0x597d89b3);
-static const Xsig shiftterm3 = MK_XSIG(0xd744fcca, 0xd69d6af4, 0x39a68bb9);
-
-static const Xsig *shiftterm[] = { &shiftterm0, &shiftterm1,
-				     &shiftterm2, &shiftterm3 };
-
-
-/*--- poly_2xm1() -----------------------------------------------------------+
- | Requires st(0) which is TAG_Valid and < 1.                                |
- +---------------------------------------------------------------------------*/
-int	poly_2xm1(u_char sign, FPU_REG *arg, FPU_REG *result)
-{
-  s32       exponent, shift;
-  u64       Xll;
-  Xsig      accumulator, Denom, argSignif;
-  u_char    tag;
-
-  exponent = exponent16(arg);
-
-#ifdef PARANOID
-  if ( exponent >= 0 )    	/* Don't want a |number| >= 1.0 */
-    {
-      /* Number negative, too large, or not Valid. */
-      INTERNAL(0x127);
-      return 1;
-    }
-#endif /* PARANOID */
-
-  argSignif.lsw = 0;
-  XSIG_LL(argSignif) = Xll = significand(arg);
-
-  if ( exponent == -1 )
-    {
-      shift = (argSignif.msw & 0x40000000) ? 3 : 2;
-      /* subtract 0.5 or 0.75 */
-      exponent -= 2;
-      XSIG_LL(argSignif) <<= 2;
-      Xll <<= 2;
-    }
-  else if ( exponent == -2 )
-    {
-      shift = 1;
-      /* subtract 0.25 */
-      exponent--;
-      XSIG_LL(argSignif) <<= 1;
-      Xll <<= 1;
-    }
-  else
-    shift = 0;
-
-  if ( exponent < -2 )
-    {
-      /* Shift the argument right by the required places. */
-      if ( FPU_shrx(&Xll, -2-exponent) >= 0x80000000U )
-	Xll++;	/* round up */
-    }
-
-  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
-  polynomial_Xsig(&accumulator, &Xll, lterms, HIPOWER-1);
-  mul_Xsig_Xsig(&accumulator, &argSignif);
-  shr_Xsig(&accumulator, 3);
-
-  mul_Xsig_Xsig(&argSignif, &hiterm);   /* The leading term */
-  add_two_Xsig(&accumulator, &argSignif, &exponent);
-
-  if ( shift )
-    {
-      /* The argument is large, use the identity:
-	 f(x+a) = f(a) * (f(x) + 1) - 1;
-	 */
-      shr_Xsig(&accumulator, - exponent);
-      accumulator.msw |= 0x80000000;      /* add 1.0 */
-      mul_Xsig_Xsig(&accumulator, shiftterm[shift]);
-      accumulator.msw &= 0x3fffffff;      /* subtract 1.0 */
-      exponent = 1;
-    }
-
-  if ( sign != SIGN_POS )
-    {
-      /* The argument is negative, use the identity:
-	     f(-x) = -f(x) / (1 + f(x))
-	 */
-      Denom.lsw = accumulator.lsw;
-      XSIG_LL(Denom) = XSIG_LL(accumulator);
-      if ( exponent < 0 )
-	shr_Xsig(&Denom, - exponent);
-      else if ( exponent > 0 )
-	{
-	  /* exponent must be 1 here */
-	  XSIG_LL(Denom) <<= 1;
-	  if ( Denom.lsw & 0x80000000 )
-	    XSIG_LL(Denom) |= 1;
-	  (Denom.lsw) <<= 1;
-	}
-      Denom.msw |= 0x80000000;      /* add 1.0 */
-      div_Xsig(&accumulator, &Denom, &accumulator);
-    }
-
-  /* Convert to 64 bit signed-compatible */
-  exponent += round_Xsig(&accumulator);
-
-  result = &st(0);
-  significand(result) = XSIG_LL(accumulator);
-  setexponent16(result, exponent);
-
-  tag = FPU_round(result, 1, FULL_PRECISION, sign);
-
-  setsign(result, sign);
-  FPU_settag0(tag);
-
-  return 0;
-
-}
--- a/bochs/fpu/poly_atan.c
+++ b/bochs/fpu/poly_atan.c
@ -1,230 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  poly_atan.c                                                              |
- |  $Id: poly_atan.c,v 1.6 2003-10-05 12:26:11 sshwarts Exp $
- |                                                                           |
- | Compute the arctan of a FPU_REG, using a polynomial approximation.        |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1997,1999                                    |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@melbpc.org.au                             |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "reg_constant.h"
-#include "fpu_emu.h"
-#include "fpu_system.h"
-#include "status_w.h"
-#include "control_w.h"
-#include "poly.h"
-
-#define	HIPOWERon	6	/* odd poly, negative terms */
-static const u64 oddnegterms[HIPOWERon] =
-{
-  BX_CONST64(0x0000000000000000), /* Dummy (not for - 1.0) */
-  BX_CONST64(0x015328437f756467),
-  BX_CONST64(0x0005dda27b73dec6),
-  BX_CONST64(0x0000226bf2bfb91a),
-  BX_CONST64(0x000000ccc439c5f7),
-  BX_CONST64(0x0000000355438407)
-} ;
-
-#define	HIPOWERop	6	/* odd poly, positive terms */
-static const u64 oddplterms[HIPOWERop] =
-{
-/*  BX_CONST64(0xaaaaaaaaaaaaaaab),  transferred to fixedpterm[] */
-  BX_CONST64(0x0db55a71875c9ac2),
-  BX_CONST64(0x0029fce2d67880b0),
-  BX_CONST64(0x0000dfd3908b4596),
-  BX_CONST64(0x00000550fd61dab4),
-  BX_CONST64(0x0000001c9422b3f9),
-  BX_CONST64(0x000000003e3301e1)
-};
-
-static const u64 denomterm = BX_CONST64(0xebd9b842c5c53a0e);
-
-static const Xsig fixedpterm = MK_XSIG(0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa);
-
-static const Xsig pi_signif = MK_XSIG(0xc90fdaa2, 0x2168c234, 0xc4c6628b);
-
-
-/*--- poly_atan() -----------------------------------------------------------+
- |                                                                           |
- +---------------------------------------------------------------------------*/
-void	poly_atan(FPU_REG *st0_ptr, u_char st0_tag,
-		  FPU_REG *st1_ptr, u_char st1_tag)
-{
-  u_char	transformed, inverted,
-                sign1, sign2;
-  s32           exponent;
-  s32   	dummy_exp;
-  Xsig          accumulator, Numer, Denom, accumulatore, argSignif,
-                argSq, argSqSq;
-  u_char        tag;
-  
-  sign1 = getsign(st0_ptr);
-  sign2 = getsign(st1_ptr);
-  if ( st0_tag == TAG_Valid )
-    {
-      exponent = exponent(st0_ptr);
-    }
-  else
-    {
-      /* This gives non-compatible stack contents... */
-      FPU_to_exp16(st0_ptr, st0_ptr);
-      exponent = exponent16(st0_ptr);
-    }
-  if ( st1_tag == TAG_Valid )
-    {
-      exponent -= exponent(st1_ptr);
-    }
-  else
-    {
-      /* This gives non-compatible stack contents... */
-      FPU_to_exp16(st1_ptr, st1_ptr);
-      exponent -= exponent16(st1_ptr);
-    }
-
-  if ( (exponent < 0) || ((exponent == 0) &&
-			  ((st0_ptr->sigh < st1_ptr->sigh) ||
-			   ((st0_ptr->sigh == st1_ptr->sigh) &&
-			    (st0_ptr->sigl < st1_ptr->sigl))) ) )
-    {
-      inverted = 1;
-      Numer.lsw = Denom.lsw = 0;
-      XSIG_LL(Numer) = significand(st0_ptr);
-      XSIG_LL(Denom) = significand(st1_ptr);
-    }
-  else
-    {
-      inverted = 0;
-      exponent = -exponent;
-      Numer.lsw = Denom.lsw = 0;
-      XSIG_LL(Numer) = significand(st1_ptr);
-      XSIG_LL(Denom) = significand(st0_ptr);
-     }
-  div_Xsig(&Numer, &Denom, &argSignif);
-  exponent += norm_Xsig(&argSignif);
-
-  if ( (exponent >= -1)
-      || ((exponent == -2) && (argSignif.msw > 0xd413ccd0)) )
-    {
-      /* The argument is greater than sqrt(2)-1 (=0.414213562...) */
-      /* Convert the argument by an identity for atan */
-      transformed = 1;
-
-      if ( exponent >= 0 )
-	{
-#ifdef PARANOID
-	  if ( !( (exponent == 0) && 
-		 (argSignif.lsw == 0) && (argSignif.midw == 0) &&
-		 (argSignif.msw == 0x80000000) ) )
-	    {
-	      INTERNAL(0x104);  /* There must be a logic error */
-	      return;
-	    }
-#endif /* PARANOID */
-	  argSignif.msw = 0;   /* Make the transformed arg -> 0.0 */
-	}
-      else
-	{
-	  Numer.lsw = Denom.lsw = argSignif.lsw;
-	  XSIG_LL(Numer) = XSIG_LL(Denom) = XSIG_LL(argSignif);
-
-	  if ( exponent < -1 )
-	    shr_Xsig(&Numer, -1-exponent);
-	  negate_Xsig(&Numer);
-      
-	  shr_Xsig(&Denom, -exponent);
-	  Denom.msw |= 0x80000000;
-      
-	  div_Xsig(&Numer, &Denom, &argSignif);
-
-	  exponent = -1 + norm_Xsig(&argSignif);
-	}
-    }
-  else
-    {
-      transformed = 0;
-    }
-
-  argSq.lsw = argSignif.lsw; argSq.midw = argSignif.midw;
-  argSq.msw = argSignif.msw;
-  mul_Xsig_Xsig(&argSq, &argSq);
-  
-  argSqSq.lsw = argSq.lsw; argSqSq.midw = argSq.midw; argSqSq.msw = argSq.msw;
-  mul_Xsig_Xsig(&argSqSq, &argSqSq);
-
-  accumulatore.lsw = argSq.lsw;
-  XSIG_LL(accumulatore) = XSIG_LL(argSq);
-
-  shr_Xsig(&argSq, 2*(-1-exponent-1));
-  shr_Xsig(&argSqSq, 4*(-1-exponent-1));
-
-  /* Now have argSq etc with binary point at the left
-     .1xxxxxxxx */
-
-  /* Do the basic fixed point polynomial evaluation */
-  accumulator.msw = accumulator.midw = accumulator.lsw = 0;
-  polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq),
-		   oddplterms, HIPOWERop-1);
-  mul64_Xsig(&accumulator, &XSIG_LL(argSq));
-  negate_Xsig(&accumulator);
-  polynomial_Xsig(&accumulator, &XSIG_LL(argSqSq), oddnegterms, HIPOWERon-1);
-  negate_Xsig(&accumulator);
-  add_two_Xsig(&accumulator, &fixedpterm, &dummy_exp);
-
-  mul64_Xsig(&accumulatore, &denomterm);
-  shr_Xsig(&accumulatore, 1 + 2*(-1-exponent));
-  accumulatore.msw |= 0x80000000;
-
-  div_Xsig(&accumulator, &accumulatore, &accumulator);
-
-  mul_Xsig_Xsig(&accumulator, &argSignif);
-  mul_Xsig_Xsig(&accumulator, &argSq);
-
-  shr_Xsig(&accumulator, 3);
-  negate_Xsig(&accumulator);
-  add_Xsig_Xsig(&accumulator, &argSignif);
-
-  if ( transformed )
-    {
-      /* compute pi/4 - accumulator */
-      shr_Xsig(&accumulator, -1-exponent);
-      negate_Xsig(&accumulator);
-      add_Xsig_Xsig(&accumulator, &pi_signif);
-      exponent = -1;
-    }
-
-  if ( inverted )
-    {
-      /* compute pi/2 - accumulator */
-      shr_Xsig(&accumulator, -exponent);
-      negate_Xsig(&accumulator);
-      add_Xsig_Xsig(&accumulator, &pi_signif);
-      exponent = 0;
-    }
-
-  if ( sign1 )
-    {
-      /* compute pi - accumulator */
-      shr_Xsig(&accumulator, 1 - exponent);
-      negate_Xsig(&accumulator);
-      add_Xsig_Xsig(&accumulator, &pi_signif);
-      exponent = 1;
-    }
-
-  exponent += round_Xsig(&accumulator);
-
-  significand(st1_ptr) = XSIG_LL(accumulator);
-  setexponent16(st1_ptr, exponent);
-
-  tag = FPU_round(st1_ptr, 1, FULL_PRECISION, sign2);
-  FPU_settagi(1, tag);
-
-
-  set_precision_flag_up();  /* We do not really know if up or down,
-			       use this as the default. */
-
-}
--- a/bochs/fpu/poly_l2.c
+++ b/bochs/fpu/poly_l2.c
@ -1,273 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  poly_l2.c                                                                |
- |  $Id: poly_l2.c,v 1.5 2003-10-05 12:26:11 sshwarts Exp $
- |                                                                           |
- | Compute the base 2 log of a FPU_REG, using a polynomial approximation.    |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1997                                         |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@suburbia.net                              |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-
-#include "exception.h"
-#include "reg_constant.h"
-#include "fpu_emu.h"
-#include "fpu_system.h"
-#include "control_w.h"
-#include "poly.h"
-
-
-static void log2_kernel(FPU_REG const *arg, u_char argsign,
-			Xsig *accum_result, s32 *expon);
-
-
-/*--- poly_l2() -------------------------------------------------------------+
- |   Base 2 logarithm by a polynomial approximation.                         |
- +---------------------------------------------------------------------------*/
-void	poly_l2(FPU_REG *st0_ptr, FPU_REG *st1_ptr, u_char st1_sign)
-{
-  s32	       exponent, expon, expon_expon;
-  Xsig         accumulator, expon_accum, yaccum;
-  u_char       sign, argsign;
-  FPU_REG      x;
-  int          tag;
-
-  exponent = exponent16(st0_ptr);
-
-  /* From st0_ptr, make a number > sqrt(2)/2 and < sqrt(2) */
-  if ( st0_ptr->sigh > (unsigned)0xb504f334 )
-    {
-      /* Treat as  sqrt(2)/2 < st0_ptr < 1 */
-      significand(&x) = - significand(st0_ptr);
-      setexponent16(&x, -1);
-      exponent++;
-      argsign = SIGN_NEG;
-    }
-  else
-    {
-      /* Treat as  1 <= st0_ptr < sqrt(2) */
-      x.sigh = st0_ptr->sigh - 0x80000000;
-      x.sigl = st0_ptr->sigl;
-      setexponent16(&x, 0);
-      argsign = SIGN_POS;
-    }
-  tag = FPU_normalize_nuo(&x, 0);
-
-  if ( tag == TAG_Zero )
-    {
-      expon = 0;
-      accumulator.msw = accumulator.midw = accumulator.lsw = 0;
-    }
-  else
-    {
-      log2_kernel(&x, argsign, &accumulator, &expon);
-    }
-
-  if ( exponent < 0 )
-    {
-      sign = SIGN_NEG;
-      exponent = -exponent;
-    }
-  else
-    sign = SIGN_POS;
-  expon_accum.msw = exponent; expon_accum.midw = expon_accum.lsw = 0;
-  if ( exponent )
-    {
-      expon_expon = 31 + norm_Xsig(&expon_accum);
-      shr_Xsig(&accumulator, expon_expon - expon);
-
-      if ( sign ^ argsign )
-	negate_Xsig(&accumulator);
-      add_Xsig_Xsig(&accumulator, &expon_accum);
-    }
-  else
-    {
-      expon_expon = expon;
-      sign = argsign;
-    }
-
-  yaccum.lsw = 0; XSIG_LL(yaccum) = significand(st1_ptr);
-  mul_Xsig_Xsig(&accumulator, &yaccum);
-
-  expon_expon += round_Xsig(&accumulator);
-
-  if ( accumulator.msw == 0 )
-    {
-      FPU_copy_to_reg1(&CONST_Z, TAG_Zero);
-      return;
-    }
-
-  significand(st1_ptr) = XSIG_LL(accumulator);
-  setexponent16(st1_ptr, expon_expon + exponent16(st1_ptr) + 1);
-
-  tag = FPU_round(st1_ptr, 1, FULL_PRECISION, sign ^ st1_sign);
-  FPU_settagi(1, tag);
-
-  set_precision_flag_up();  /* 80486 appears to always do this */
-
-  return;
-
-}
-
-
-/*--- poly_l2p1() -----------------------------------------------------------+
- |   Base 2 logarithm by a polynomial approximation.                         |
- |   log2(x+1)                                                               |
- +---------------------------------------------------------------------------*/
-int	poly_l2p1(u_char sign0, u_char sign1,
-		  FPU_REG *st0_ptr, FPU_REG *st1_ptr, FPU_REG *dest)
-{
-  u_char       	tag;
-  s32        	exponent;
-  Xsig         	accumulator, yaccum;
-
-  if ( exponent16(st0_ptr) < 0 )
-    {
-      log2_kernel(st0_ptr, sign0, &accumulator, &exponent);
-
-      yaccum.lsw = 0;
-      XSIG_LL(yaccum) = significand(st1_ptr);
-      mul_Xsig_Xsig(&accumulator, &yaccum);
-
-      exponent += round_Xsig(&accumulator);
-
-      exponent += exponent16(st1_ptr) + 1;
-      if ( exponent < EXP_WAY_UNDER ) exponent = EXP_WAY_UNDER;
-
-      significand(dest) = XSIG_LL(accumulator);
-      setexponent16(dest, exponent);
-
-      tag = FPU_round(dest, 1, FULL_PRECISION, sign0 ^ sign1);
-      FPU_settagi(1, tag);
-
-      if ( tag == TAG_Valid )
-	set_precision_flag_up();   /* 80486 appears to always do this */
-    }
-  else
-    {
-      /* The magnitude of st0_ptr is far too large. */
-
-      if ( sign0 != SIGN_POS )
-	{
-	  /* Trying to get the log of a negative number. */
-#ifdef PECULIAR_486   /* Stupid 80486 doesn't worry about log(negative). */
-	  changesign(st1_ptr);
-#else
-	  if ( arith_invalid(1) < 0 )
-	    return 1;
-#endif /* PECULIAR_486 */
-	}
-
-      /* 80486 appears to do this */
-      if ( sign0 == SIGN_NEG )
-	set_precision_flag_down();
-      else
-	set_precision_flag_up();
-    }
-
-  if ( exponent(dest) <= EXP_UNDER )
-    EXCEPTION(EX_Underflow);
-
-  return 0;
-
-}
-
-
-
-
-#undef HIPOWER
-#define	HIPOWER	10
-static const u64 logterms[HIPOWER] =
-{
-  BX_CONST64(0x2a8eca5705fc2ef0),
-  BX_CONST64(0xf6384ee1d01febce),
-  BX_CONST64(0x093bb62877cdf642),
-  BX_CONST64(0x006985d8a9ec439b),
-  BX_CONST64(0x0005212c4f55a9c8),
-  BX_CONST64(0x00004326a16927f0),
-  BX_CONST64(0x0000038d1d80a0e7),
-  BX_CONST64(0x0000003141cc80c6),
-  BX_CONST64(0x00000002b1668c9f),
-  BX_CONST64(0x000000002c7a46aa)
-};
-
-static const u32 leadterm = 0xb8000000;
-
-
-/*--- log2_kernel() ---------------------------------------------------------+
- |   Base 2 logarithm by a polynomial approximation.                         |
- |   log2(x+1)                                                               |
- +---------------------------------------------------------------------------*/
-static void log2_kernel(FPU_REG const *arg, u_char argsign, Xsig *accum_result,
-			s32 *expon)
-{
-  s32    exponent, adj;
-  u64    Xsq;
-  Xsig   accumulator, Numer, Denom, argSignif, arg_signif;
-
-  exponent = exponent16(arg);
-  Numer.lsw = Denom.lsw = 0;
-  XSIG_LL(Numer) = XSIG_LL(Denom) = significand(arg);
-  if ( argsign == SIGN_POS )
-    {
-      shr_Xsig(&Denom, 2 - (1 + exponent));
-      Denom.msw |= 0x80000000;
-      div_Xsig(&Numer, &Denom, &argSignif);
-    }
-  else
-    {
-      shr_Xsig(&Denom, 1 - (1 + exponent));
-      negate_Xsig(&Denom);
-      if ( Denom.msw & 0x80000000 )
-	{
-	  div_Xsig(&Numer, &Denom, &argSignif);
-	  exponent ++;
-	}
-      else
-	{
-	  /* Denom must be 1.0 */
-	  argSignif.lsw = Numer.lsw; argSignif.midw = Numer.midw;
-	  argSignif.msw = Numer.msw;
-	}
-    }
-
-#ifndef PECULIAR_486
-  /* Should check here that  |local_arg|  is within the valid range */
-  if ( exponent >= -2 )
-    {
-      if ( (exponent > -2) ||
-	  (argSignif.msw > (unsigned)0xafb0ccc0) )
-	{
-	  /* The argument is too large */
-	}
-    }
-#endif /* PECULIAR_486 */
-
-  arg_signif.lsw = argSignif.lsw; XSIG_LL(arg_signif) = XSIG_LL(argSignif);
-  adj = norm_Xsig(&argSignif);
-  accumulator.lsw = argSignif.lsw; XSIG_LL(accumulator) = XSIG_LL(argSignif);
-  mul_Xsig_Xsig(&accumulator, &accumulator);
-  shr_Xsig(&accumulator, 2*(-1 - (1 + exponent + adj)));
-  Xsq = XSIG_LL(accumulator);
-  if ( accumulator.lsw & 0x80000000 )
-    Xsq++;
-
-  accumulator.msw = accumulator.midw = accumulator.lsw = 0;
-  /* Do the basic fixed point polynomial evaluation */
-  polynomial_Xsig(&accumulator, &Xsq, logterms, HIPOWER-1);
-
-  mul_Xsig_Xsig(&accumulator, &argSignif);
-  shr_Xsig(&accumulator, 6 - adj);
-
-  mul32_Xsig(&arg_signif, leadterm);
-  add_two_Xsig(&accumulator, &arg_signif, &exponent);
-
-  *expon = exponent + 1;
-  accum_result->lsw = accumulator.lsw;
-  accum_result->midw = accumulator.midw;
-  accum_result->msw = accumulator.msw;
-
-}
--- a/bochs/fpu/poly_sin.c
+++ b/bochs/fpu/poly_sin.c
@ -1,398 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  poly_sin.c                                                               |
- |  $Id: poly_sin.c,v 1.5 2003-10-04 12:32:56 sshwarts Exp $
- |                                                                           |
- |  Computation of an approximation of the sin function and the cosine       |
- |  function by a polynomial.                                                |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1997,1999                                    |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@melbpc.org.au                             |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-
-#include "exception.h"
-#include "reg_constant.h"
-#include "fpu_emu.h"
-#include "fpu_system.h"
-#include "control_w.h"
-#include "poly.h"
-
-
-#define	N_COEFF_P	4
-#define	N_COEFF_N	4
-
-static const u64 pos_terms_l[N_COEFF_P] =
-{
-  BX_CONST64(0xaaaaaaaaaaaaaaab),
-  BX_CONST64(0x00d00d00d00cf906),
-  BX_CONST64(0x000006b99159a8bb),
-  BX_CONST64(0x000000000d7392e6)
-};
-
-static const u64 neg_terms_l[N_COEFF_N] =
-{
-  BX_CONST64(0x2222222222222167),
-  BX_CONST64(0x0002e3bc74aab624),
-  BX_CONST64(0x0000000b09229062),
-  BX_CONST64(0x00000000000c7973)
-};
-
-
-
-#define	N_COEFF_PH	4
-#define	N_COEFF_NH	4
-static const u64 pos_terms_h[N_COEFF_PH] =
-{
-  BX_CONST64(0x0000000000000000),
-  BX_CONST64(0x05b05b05b05b0406),
-  BX_CONST64(0x000049f93edd91a9),
-  BX_CONST64(0x00000000c9c9ed62)
-};
-
-static const u64 neg_terms_h[N_COEFF_NH] =
-{
-  BX_CONST64(0xaaaaaaaaaaaaaa98),
-  BX_CONST64(0x001a01a01a019064),
-  BX_CONST64(0x0000008f76c68a77),
-  BX_CONST64(0x0000000000d58f5e)
-};
-
-
-/*--- poly_sine() -----------------------------------------------------------+
- |                                                                           |
- +---------------------------------------------------------------------------*/
-void	poly_sine(FPU_REG *st0_ptr)
-{
-  int       exponent, echange;
-  Xsig      accumulator, argSqrd, argTo4;
-  s32       fix_up, adj;
-  u64       fixed_arg;
-  FPU_REG   result;
-
-  exponent = exponent(st0_ptr);
-
-  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
-
-  /* Split into two ranges, for arguments below and above 1.0 */
-  /* The boundary between upper and lower is approx 0.88309101259 */
-  if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) )
-    {
-      /* The argument is <= 0.88309101259 */
-
-      argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0;
-      mul64_Xsig(&argSqrd, &significand(st0_ptr));
-      shr_Xsig(&argSqrd, 2*(-1-exponent));
-      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
-      argTo4.lsw = argSqrd.lsw;
-      mul_Xsig_Xsig(&argTo4, &argTo4);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
-		      N_COEFF_N-1);
-      mul_Xsig_Xsig(&accumulator, &argSqrd);
-      negate_Xsig(&accumulator);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
-		      N_COEFF_P-1);
-
-      shr_Xsig(&accumulator, 2);    /* Divide by four */
-      accumulator.msw |= 0x80000000;  /* Add 1.0 */
-
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-
-      /* Divide by four, FPU_REG compatible, etc */
-      exponent = 3*exponent;
-
-      /* The minimum exponent difference is 3 */
-      shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
-
-      negate_Xsig(&accumulator);
-      XSIG_LL(accumulator) += significand(st0_ptr);
-
-      echange = round_Xsig(&accumulator);
-
-      setexponentpos(&result, exponent(st0_ptr) + echange);
-    }
-  else
-    {
-      /* The argument is > 0.88309101259 */
-      /* We use sin(st(0)) = cos(pi/2-st(0)) */
-
-      fixed_arg = significand(st0_ptr);
-
-      if ( exponent == 0 )
-	{
-	  /* The argument is >= 1.0 */
-
-	  /* Put the binary point at the left. */
-	  fixed_arg <<= 1;
-	}
-      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
-      fixed_arg = BX_CONST64(0x921fb54442d18469) - fixed_arg;
-      /* There is a special case which arises due to rounding, to fix here. */
-      if ( fixed_arg == BX_CONST64(0xffffffffffffffff))
-	fixed_arg = 0;
-
-      XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
-      mul64_Xsig(&argSqrd, &fixed_arg);
-
-      XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw;
-      mul_Xsig_Xsig(&argTo4, &argTo4);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
-		      N_COEFF_NH-1);
-      mul_Xsig_Xsig(&accumulator, &argSqrd);
-      negate_Xsig(&accumulator);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
-		      N_COEFF_PH-1);
-      negate_Xsig(&accumulator);
-
-      mul64_Xsig(&accumulator, &fixed_arg);
-      mul64_Xsig(&accumulator, &fixed_arg);
-
-      shr_Xsig(&accumulator, 3);
-      negate_Xsig(&accumulator);
-
-      add_Xsig_Xsig(&accumulator, &argSqrd);
-
-      shr_Xsig(&accumulator, 1);
-
-      accumulator.lsw |= 1;  /* A zero accumulator here would cause problems */
-      negate_Xsig(&accumulator);
-
-      /* The basic computation is complete. Now fix the answer to
-	 compensate for the error due to the approximation used for
-	 pi/2
-	 */
-
-      /* This has an exponent of -65 */
-      fix_up = 0x898cc517;
-      /* The fix-up needs to be improved for larger args */
-      if ( argSqrd.msw & 0xffc00000 )
-	{
-	  /* Get about 32 bit precision in these: */
-	  fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
-	}
-      fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
-
-      adj = accumulator.lsw;    /* temp save */
-      accumulator.lsw -= fix_up;
-      if ( accumulator.lsw > adj )
-	XSIG_LL(accumulator) --;
-
-      echange = round_Xsig(&accumulator);
-
-      setexponentpos(&result, echange - 1);
-    }
-
-  significand(&result) = XSIG_LL(accumulator);
-  setsign(&result, getsign(st0_ptr));
-  FPU_copy_to_reg0(&result, TAG_Valid);
-
-#ifdef PARANOID
-  if ( (exponent(&result) >= 0)
-      && (significand(&result) > BX_CONST64(0x8000000000000000)) )
-    {
-      INTERNAL(0x150);
-    }
-#endif /* PARANOID */
-
-}
-
-
-
-/*--- poly_cos() ------------------------------------------------------------+
- |                                                                           |
- +---------------------------------------------------------------------------*/
-void	poly_cos(FPU_REG *st0_ptr)
-{
-  FPU_REG    result;
-  s32        exponent, exp2, echange;
-  Xsig       accumulator, argSqrd, fix_up, argTo4;
-  u64        fixed_arg;
-
-#ifdef PARANOID
-  if ( (exponent(st0_ptr) > 0)
-      || ((exponent(st0_ptr) == 0)
-	  && (significand(st0_ptr) > BX_CONST64(0xc90fdaa22168c234))) )
-    {
-      EXCEPTION(EX_Invalid);
-      FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
-      return;
-    }
-#endif /* PARANOID */
-
-  exponent = exponent(st0_ptr);
-
-  accumulator.lsw = accumulator.midw = accumulator.msw = 0;
-
-  if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) )
-    {
-      /* arg is < 0.687705 */
-
-      argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl;
-      argSqrd.lsw = 0;
-      mul64_Xsig(&argSqrd, &significand(st0_ptr));
-
-      if ( exponent < -1 )
-	{
-	  /* shift the argument right by the required places */
-	  shr_Xsig(&argSqrd, 2*(-1-exponent));
-	}
-
-      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
-      argTo4.lsw = argSqrd.lsw;
-      mul_Xsig_Xsig(&argTo4, &argTo4);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
-		      N_COEFF_NH-1);
-      mul_Xsig_Xsig(&accumulator, &argSqrd);
-      negate_Xsig(&accumulator);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
-		      N_COEFF_PH-1);
-      negate_Xsig(&accumulator);
-
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-      mul64_Xsig(&accumulator, &significand(st0_ptr));
-      shr_Xsig(&accumulator, -2*(1+exponent));
-
-      shr_Xsig(&accumulator, 3);
-      negate_Xsig(&accumulator);
-
-      add_Xsig_Xsig(&accumulator, &argSqrd);
-
-      shr_Xsig(&accumulator, 1);
-
-      /* It doesn't matter if accumulator is all zero here, the
-	 following code will work ok */
-      negate_Xsig(&accumulator);
-
-      if ( accumulator.lsw & 0x80000000 )
-	XSIG_LL(accumulator) ++;
-      if ( accumulator.msw == 0 )
-	{
-	  /* The result is 1.0 */
-	  FPU_copy_to_reg0(&CONST_1, TAG_Valid);
-	  return;
-	}
-      else
-	{
-	  significand(&result) = XSIG_LL(accumulator);
-      
-	  /* will be a valid positive nr with expon = -1 */
-	  setexponentpos(&result, -1);
-	}
-    }
-  else
-    {
-      fixed_arg = significand(st0_ptr);
-
-      if ( exponent == 0 )
-	{
-	  /* The argument is >= 1.0 */
-
-	  /* Put the binary point at the left. */
-	  fixed_arg <<= 1;
-	}
-      /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
-      fixed_arg = BX_CONST64(0x921fb54442d18469) - fixed_arg;
-      /* There is a special case which arises due to rounding, to fix here. */
-      if ( fixed_arg == BX_CONST64(0xffffffffffffffff))
-	fixed_arg = 0;
-
-      exponent = -1;
-      exp2 = -1;
-
-      /* A shift is needed here only for a narrow range of arguments,
-	 i.e. for fixed_arg approx 2^-32, but we pick up more... */
-      if ( !(LL_MSW(fixed_arg) & 0xffff0000) )
-	{
-	  fixed_arg <<= 16;
-	  exponent -= 16;
-	  exp2 -= 16;
-	}
-
-      XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0;
-      mul64_Xsig(&argSqrd, &fixed_arg);
-
-      if ( exponent < -1 )
-	{
-	  /* shift the argument right by the required places */
-	  shr_Xsig(&argSqrd, 2*(-1-exponent));
-	}
-
-      argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw;
-      argTo4.lsw = argSqrd.lsw;
-      mul_Xsig_Xsig(&argTo4, &argTo4);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
-		      N_COEFF_N-1);
-      mul_Xsig_Xsig(&accumulator, &argSqrd);
-      negate_Xsig(&accumulator);
-
-      polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
-		      N_COEFF_P-1);
-
-      shr_Xsig(&accumulator, 2);    /* Divide by four */
-      accumulator.msw |= 0x80000000;  /* Add 1.0 */
-
-      mul64_Xsig(&accumulator, &fixed_arg);
-      mul64_Xsig(&accumulator, &fixed_arg);
-      mul64_Xsig(&accumulator, &fixed_arg);
-
-      /* Divide by four, FPU_REG compatible, etc */
-      exponent = 3*exponent;
-
-      /* The minimum exponent difference is 3 */
-      shr_Xsig(&accumulator, exp2 - exponent);
-
-      negate_Xsig(&accumulator);
-      XSIG_LL(accumulator) += fixed_arg;
-
-      /* The basic computation is complete. Now fix the answer to
-	 compensate for the error due to the approximation used for
-	 pi/2
-	 */
-
-      /* This has an exponent of -65 */
-      XSIG_LL(fix_up) = BX_CONST64(0x898cc51701b839a2);
-      fix_up.lsw = 0;
-
-      /* The fix-up needs to be improved for larger args */
-      if ( argSqrd.msw & 0xffc00000 )
-	{
-	  /* Get about 32 bit precision in these: */
-	  fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
-	  fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
-	}
-
-      exp2 += norm_Xsig(&accumulator);
-      shr_Xsig(&accumulator, 1); /* Prevent overflow */
-      exp2++;
-      shr_Xsig(&fix_up, 65 + exp2);
-
-      add_Xsig_Xsig(&accumulator, &fix_up);
-
-      echange = round_Xsig(&accumulator);
-
-      setexponentpos(&result, exp2 + echange);
-      significand(&result) = XSIG_LL(accumulator);
-    }
-
-  FPU_copy_to_reg0(&result, TAG_Valid);
-
-#ifdef PARANOID
-  if ( (exponent(&result) >= 0)
-      && (significand(&result) > BX_CONST64(0x8000000000000000)) )
-    {
-      INTERNAL(0x151);
-    }
-#endif /* PARANOID */
-
-}
--- a/bochs/fpu/poly_tan.c
+++ b/bochs/fpu/poly_tan.c
@ -1,160 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  poly_tan.c                                                               |
- |  $Id: poly_tan.c,v 1.5 2003-05-15 16:19:39 sshwarts Exp $
- |                                                                           |
- | Compute the tan of a FPU_REG, using a polynomial approximation.           |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1997,1999                                    |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail   billm@melbpc.org.au            |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "reg_constant.h"
-#include "fpu_emu.h"
-#include "fpu_system.h"
-#include "control_w.h"
-#include "poly.h"
-
-#define	HiPOWERop	3	/* odd poly, positive terms */
-static const u64 oddplterm[HiPOWERop] =
-{
-  BX_CONST64(0x0000000000000000),
-  BX_CONST64(0x0051a1cf08fca228),
-  BX_CONST64(0x0000000071284ff7)
-};
-
-#define	HiPOWERon	2	/* odd poly, negative terms */
-static const u64 oddnegterm[HiPOWERon] =
-{
-   BX_CONST64(0x1291a9a184244e80),
-   BX_CONST64(0x0000583245819c21)
-};
-
-#define	HiPOWERep	2	/* even poly, positive terms */
-static const u64 evenplterm[HiPOWERep] =
-{
-  BX_CONST64(0x0e848884b539e888),
-  BX_CONST64(0x00003c7f18b887da)
-};
-
-#define	HiPOWERen	2	/* even poly, negative terms */
-static const u64 evennegterm[HiPOWERen] =
-{
-  BX_CONST64(0xf1f0200fd51569cc),
-  BX_CONST64(0x003afb46105c4432)
-};
-
-static const u64 twothirds = BX_CONST64(0xaaaaaaaaaaaaaaab);
-
-
-/*--- poly_tan() ------------------------------------------------------------+
- |                                                                           |
- +---------------------------------------------------------------------------*/
-void   poly_tan(FPU_REG *st0_ptr, int invert)
-{
-  s32         exponent;
-  Xsig        argSq, argSqSq, accumulatoro, accumulatore, accum,
-              argSignif;
-
-  exponent = exponent(st0_ptr);
-
-
-#ifdef PARANOID
-  if ( signnegative(st0_ptr) )	/* Can't hack a number < 0.0 */
-    { arith_invalid(0); return; }  /* Need a positive number */
-#endif /* PARANOID */
-
-  if ( (exponent >= 0)
-       || ((exponent == -1) && (st0_ptr->sigh > 0xc90fdaa2)) )
-    {
-    EXCEPTION(0x250);
-    }
-  else
-    {
-      argSignif.lsw = 0;
-      XSIG_LL(accum) = XSIG_LL(argSignif) = significand(st0_ptr);
- 
-      if ( exponent < -1 )
-	{
-	  /* shift the argument right by the required places */
-	  if ( FPU_shrx(&XSIG_LL(accum), -1-exponent) >= 0x80000000U )
-	    XSIG_LL(accum) ++;	/* round up */
-	}
-    }
-
-  XSIG_LL(argSq) = XSIG_LL(accum); argSq.lsw = accum.lsw;
-  mul_Xsig_Xsig(&argSq, &argSq);
-  XSIG_LL(argSqSq) = XSIG_LL(argSq); argSqSq.lsw = argSq.lsw;
-  mul_Xsig_Xsig(&argSqSq, &argSqSq);
-
-  /* Compute the negative terms for the numerator polynomial */
-  accumulatoro.msw = accumulatoro.midw = accumulatoro.lsw = 0;
-  polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddnegterm, HiPOWERon-1);
-  mul_Xsig_Xsig(&accumulatoro, &argSq);
-  negate_Xsig(&accumulatoro);
-  /* Add the positive terms */
-  polynomial_Xsig(&accumulatoro, &XSIG_LL(argSqSq), oddplterm, HiPOWERop-1);
-
-  
-  /* Compute the positive terms for the denominator polynomial */
-  accumulatore.msw = accumulatore.midw = accumulatore.lsw = 0;
-  polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evenplterm, HiPOWERep-1);
-  mul_Xsig_Xsig(&accumulatore, &argSq);
-  negate_Xsig(&accumulatore);
-  /* Add the negative terms */
-  polynomial_Xsig(&accumulatore, &XSIG_LL(argSqSq), evennegterm, HiPOWERen-1);
-  /* Multiply by arg^2 */
-  mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
-  mul64_Xsig(&accumulatore, &XSIG_LL(argSignif));
-  /* de-normalize and divide by 2 */
-  shr_Xsig(&accumulatore, -2*(1+exponent) + 1);
-  negate_Xsig(&accumulatore);      /* This does 1 - accumulator */
-
-  /* Now find the ratio. */
-  if ( accumulatore.msw == 0 )
-    {
-      /* accumulatoro must contain 1.0 here, (actually, 0) but it
-	 really doesn't matter what value we use because it will
-	 have negligible effect in later calculations
-	 */
-      XSIG_LL(accum) = BX_CONST64(0x8000000000000000);
-      accum.lsw = 0;
-    }
-  else
-    {
-      div_Xsig(&accumulatoro, &accumulatore, &accum);
-    }
-
-  /* Multiply by 1/3 * arg^3 */
-  mul64_Xsig(&accum, &XSIG_LL(argSignif));
-  mul64_Xsig(&accum, &XSIG_LL(argSignif));
-  mul64_Xsig(&accum, &XSIG_LL(argSignif));
-  mul64_Xsig(&accum, &twothirds);
-  shr_Xsig(&accum, -2*(exponent+1));
-
-
-  /* tan(arg) = arg + accum */
-  add_two_Xsig(&accum, &argSignif, &exponent);
-
-  if ( invert )
-    {
-      /* accum now contains tan(pi/2 - arg).
-	 Use tan(arg) = 1.0 / tan(pi/2 - arg)
-	 */
-      accumulatoro.lsw = accumulatoro.midw = 0;
-      accumulatoro.msw = 0x80000000;
-      div_Xsig(&accumulatoro, &accum, &accum);
-      exponent = - exponent;
-    }
-
-
-  /* Transfer the result */
-  exponent += round_Xsig(&accum);
-  FPU_settag0(TAG_Valid);
-  significand(st0_ptr) = XSIG_LL(accum);
-  setexponent16(st0_ptr, exponent + EXTENDED_Ebias);  /* Result is positive. */
-
-}
--- a/bochs/fpu/polynom_Xsig.c
+++ b/bochs/fpu/polynom_Xsig.c
@ -1,133 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  polynomial_Xsig.c                                                        |
- |  $Id: polynom_Xsig.c,v 1.2 2001-10-06 03:53:46 bdenney Exp $
- |                                                                           |
- | Fixed point arithmetic polynomial evaluation.                             |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1995,1999                                    |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@melbpc.org.au              |
- |                                                                           |
- | Computes:                                                                 |
- | terms[0] + (terms[1] + (terms[2] + ... + (terms[n]*x)*x)*x)*x) ... )*x    |
- | and adds the result to the 12 byte Xsig.                                  |
- | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
- | precision.                                                                |
- |                                                                           |
- | This function must be used carefully: most overflow of intermediate       |
- | results is controlled, but overflow of the result is not.                 |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_emu.h"
-#include "poly.h"
-
-
-void polynomial_Xsig(Xsig *accum, const u64 *x, const u64 terms[], const int n)
-{
-  int  i;
-  Xsig acc, Xprod;
-  u32  lprod;
-  u64  xlwr, xupr, prod;
-  char overflowed;
-
-  xlwr = (u32)(*x);
-  xupr = (u32)((*x) >> 32);
-
-  acc.msw = terms[n] >> 32;
-  acc.midw = terms[n];
-  acc.lsw = 0;
-  overflowed = 0;
-
-  for ( i = n-1; i >= 0; i-- )
-    {
-      /* Split the product into five parts to get a 16 byte result */
-
-      /* first word by first word */
-      prod = acc.msw * xupr;
-      Xprod.midw = prod;
-      Xprod.msw = prod >> 32;
-
-      /* first word by second word */
-      prod = acc.msw * xlwr;
-      Xprod.lsw = prod;
-      lprod = prod >> 32;
-      Xprod.midw += lprod;
-      if ( lprod > Xprod.midw )
-	Xprod.msw ++;
-
-      /* second word by first word */
-      prod = acc.midw * xupr;
-      Xprod.lsw += prod;
-      if ( (u32)prod > Xprod.lsw )
-	{
-	  Xprod.midw ++;
-	  if ( Xprod.midw == 0 )
-	    Xprod.msw ++;
-	}
-      lprod = prod >> 32;
-      Xprod.midw += lprod;
-      if ( lprod > Xprod.midw )
-	Xprod.msw ++;
-
-      /* second word by second word */
-      prod = acc.midw * xlwr;
-      lprod = prod >> 32;
-      Xprod.lsw += lprod;
-      if ( lprod > Xprod.lsw )
-	{
-	  Xprod.midw ++;
-	  if ( Xprod.midw == 0 )
-	    Xprod.msw ++;
-	}
-
-      /* third word by first word */
-      prod = acc.lsw * xupr;
-      lprod = prod >> 32;
-      Xprod.lsw += lprod;
-      if ( lprod > Xprod.lsw )
-	{
-	  Xprod.midw ++;
-	  if ( Xprod.midw == 0 )
-	    Xprod.msw ++;
-	}
-
-      if ( overflowed )
-	{
-	  Xprod.midw += xlwr;
-	  if ( (u32)xlwr > Xprod.midw )
-	    Xprod.msw ++;
-	  Xprod.msw += xupr;
-	  overflowed = 0;    /* We don't check this addition for overflow */
-	}
-      
-      acc.lsw = Xprod.lsw;
-      acc.midw = (u32)terms[i] + Xprod.midw;
-      acc.msw = (terms[i] >> 32) + Xprod.msw;
-      if ( Xprod.msw > acc.msw )
-	overflowed = 1;
-      if ( (u32)terms[i] > acc.midw )
-	{
-	  acc.msw ++;
-	  if ( acc.msw == 0 )
-	    overflowed = 1;
-	}
-    }
-
-  /* We don't check the addition to accum for overflow */
-  accum->lsw += acc.lsw;
-  if ( acc.lsw > accum->lsw )
-    {
-      accum->midw ++;
-      if ( accum->midw == 0 )
-	accum->msw ++;
-    }
-  accum->midw += acc.midw;
-  if ( acc.midw > accum->midw )
-    {
-      accum->msw ++;
-    }
-  accum->msw += acc.msw;
-}
-
-
--- a/bochs/fpu/reg_add_sub.c
+++ b/bochs/fpu/reg_add_sub.c
@ -1,424 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  reg_add_sub.c                                                            |
- |  $Id: reg_add_sub.c,v 1.9 2003-10-05 12:26:11 sshwarts Exp $
- |                                                                           |
- | Functions to add or subtract two registers and put the result in a third. |
- |                                                                           |
- | Copyright (C) 1992,1993,1997                                              |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@suburbia.net                              |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- |  For each function, the destination may be any FPU_REG, including one of  |
- | the source FPU_REGs.                                                      |
- |  Each function returns 0 if the answer is o.k., otherwise a non-zero      |
- | value is returned, indicating either an exception condition or an         |
- | internal error.                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "reg_constant.h"
-#include "fpu_emu.h"
-#include "control_w.h"
-#include "fpu_system.h"
-
-static
-int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
-		     FPU_REG const *b, u_char tagb, u_char signb,
-		     FPU_REG *dest, int deststnr, u16 control_w);
-
-/*
-  Operates on st(0) and st(n), or on st(0) and temporary data.
-  The destination must be one of the source st(x).
-  */
-int FPU_add(FPU_REG const *b, u_char tagb, int deststnr, u16 control_w)
-{
-  FPU_REG *a = &st(0);
-  FPU_REG *dest = &st(deststnr);
-  u_char signb = getsign(b);
-  u_char taga = FPU_gettag0();
-  u_char signa = getsign(a);
-  u_char saved_sign = getsign(dest);
-  int diff, tag, expa, expb;
-  
-  if (!(taga | tagb))
-    {
-      expa = exponent(a);
-      expb = exponent(b);
-
-    valid_add:
-      /* Both registers are valid */
-      if (!(signa ^ signb))
-	{
-	  /* signs are the same */
-	  tag = FPU_u_add(a, b, dest, control_w, signa, expa, expb);
-	}
-      else
-	{
-	  /* The signs are different, so do a subtraction */
-	  diff = expa - expb;
-	  if (!diff)
-	    {
-	      diff = a->sigh - b->sigh;  /* This works only if the ms bits
-					    are identical. */
-	      if (!diff)
-		{
-		  diff = a->sigl > b->sigl;
-		  if (!diff)
-		    diff = -(a->sigl < b->sigl);
-		}
-	    }
-      
-	  if (diff > 0)
-	    {
-	      tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
-	    }
-	  else if (diff < 0)
-	    {
-	      tag = FPU_u_sub(b, a, dest, control_w, signb, expb, expa);
-	    }
-	  else
-	    {
-	      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-	      /* sign depends upon rounding mode */
-	      setsign(dest, ((control_w & CW_RC) != RC_DOWN)
-		      ? SIGN_POS : SIGN_NEG);
-	      return TAG_Zero;
-	    }
-	}
-
-      if (tag < 0)
-	{
-	  setsign(dest, saved_sign);
-	  return tag;
-	}
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-
-  if (taga == TAG_Special)
-    taga = FPU_Special(a);
-  if (tagb == TAG_Special)
-    tagb = FPU_Special(b);
-
-  if (((taga == TAG_Valid) && (tagb == TW_Denormal))
-	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
-	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)))
-    {
-      FPU_REG x, y;
-
-      if (denormal_operand() < 0)
-	return FPU_Exception;
-
-      FPU_to_exp16(a, &x);
-      FPU_to_exp16(b, &y);
-      a = &x;
-      b = &y;
-      expa = exponent16(a);
-      expb = exponent16(b);
-      goto valid_add;
-    }
-
-  if ((taga == TW_NaN) || (tagb == TW_NaN))
-    {
-      if (deststnr == 0)
-	return real_2op_NaN(b, tagb, deststnr, a);
-      else
-	return real_2op_NaN(a, taga, deststnr, a);
-    }
-
-  return add_sub_specials(a, taga, signa, b, tagb, signb,
-			  dest, deststnr, control_w);
-}
-
-
-/* Subtract b from a.  (a-b) -> dest
- bbd: arg2 used to be int type, but sometimes pointers were forced
- in with typecasts.  On Alphas pointers are 64 bits and ints are 32,
- so when rm was cast back to a pointer...SEGFAULT.  Pass the pointers
- around instead, since they are always larger precision than the
- register numbers. */
-int FPU_sub(int flags, FPU_REG *rm, u16 control_w)
-{
-  FPU_REG const *a, *b;
-  FPU_REG *dest;
-  u_char taga, tagb, signa, signb, saved_sign, sign;
-  int diff, tag, expa, expb, deststnr;
-
-  a = &st(0);
-  taga = FPU_gettag0();
-
-  deststnr = 0;
-  if (flags & LOADED)
-    {
-      b = rm;
-      tagb = flags & 0x0f;
-    }
-  else
-    {
-      int rmint = PTR2INT(rm);
-      b = &st(rmint);
-      tagb = FPU_gettagi(rmint);
-
-      if (flags & DEST_RM)
-	deststnr = rmint;
-    }
-
-  signa = getsign(a);
-  signb = getsign(b);
-
-  if (flags & REV)
-    {
-      signa ^= SIGN_NEG;
-      signb ^= SIGN_NEG;
-    }
-
-  dest = &st(deststnr);
-  saved_sign = getsign(dest);
-
-  if (!(taga | tagb))
-    {
-      expa = exponent(a);
-      expb = exponent(b);
-
-    valid_subtract:
-      /* Both registers are valid */
-
-      diff = expa - expb;
-
-      if (!diff)
-	{
-	  diff = a->sigh - b->sigh;  /* Works only if ms bits are identical */
-	  if (!diff)
-	    {
-	      diff = a->sigl > b->sigl;
-	      if (!diff)
-		diff = -(a->sigl < b->sigl);
-	    }
-	}
-
-      switch ((((int)signa)*2 + signb) / SIGN_NEG)
-	{
-	case 0: /* P - P */
-	case 3: /* N - N */
-	  if (diff > 0)
-	    {
-	      /* |a| > |b| */
-	      tag = FPU_u_sub(a, b, dest, control_w, signa, expa, expb);
-	    }
-	  else if (diff == 0)
-	    {
-	      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-
-	      /* sign depends upon rounding mode */
-	      setsign(dest, ((control_w & CW_RC) != RC_DOWN)
-		? SIGN_POS : SIGN_NEG);
-	      return TAG_Zero;
-	    }
-	  else
-	    {
-	      sign = signa ^ SIGN_NEG;
-	      tag = FPU_u_sub(b, a, dest, control_w, sign, expb, expa);
-	    }
-	  break;
-	case 1: /* P - N */
-	  tag = FPU_u_add(a, b, dest, control_w, SIGN_POS, expa, expb);
-	  break;
-	case 2: /* N - P */
-	  tag = FPU_u_add(a, b, dest, control_w, SIGN_NEG, expa, expb);
-	  break;
-#ifdef PARANOID
-	default:
-	  INTERNAL(0x111);
-	  return -1;
-#endif
-	}
-      if (tag < 0)
-	{
-	  setsign(dest, saved_sign);
-	  return tag;
-	}
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-
-  if (taga == TAG_Special)
-    taga = FPU_Special(a);
-  if (tagb == TAG_Special)
-    tagb = FPU_Special(b);
-
-  if (((taga == TAG_Valid) && (tagb == TW_Denormal))
-	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
-	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)))
-    {
-      FPU_REG x, y;
-
-      if (denormal_operand() < 0)
-	return FPU_Exception;
-
-      FPU_to_exp16(a, &x);
-      FPU_to_exp16(b, &y);
-      a = &x;
-      b = &y;
-      expa = exponent16(a);
-      expb = exponent16(b);
-
-      goto valid_subtract;
-    }
-
-  if ((taga == TW_NaN) || (tagb == TW_NaN))
-    {
-      FPU_REG const *d1, *d2;
-      if (flags & REV)
-	{
-	  d1 = b;
-	  d2 = a;
-	}
-      else
-	{
-	  d1 = a;
-	  d2 = b;
-	}
-      if (flags & LOADED)
-	return real_2op_NaN(b, tagb, deststnr, d1);
-      if (flags & DEST_RM)
-	return real_2op_NaN(a, taga, deststnr, d2);
-      else
-	return real_2op_NaN(b, tagb, deststnr, d2);
-    }
-
-    return add_sub_specials(a, taga, signa, b, tagb, signb ^ SIGN_NEG,
-			    dest, deststnr, control_w);
-}
-
-
-static
-int add_sub_specials_core(FPU_REG const *a, u_char taga, u_char signa,
-		     FPU_REG const *b, u_char tagb, u_char signb,
-		     FPU_REG *dest, int deststnr, u16 control_w)
-{
-  if (((taga == TW_Denormal) || (tagb == TW_Denormal))
-       && (denormal_operand() < 0))
-    return FPU_Exception;
-
-  if (taga == TAG_Zero && tagb != TW_Infinity)
-    {
-      if (tagb == TAG_Zero)
-	{
-	  /* Both are zero, result will be zero. */
-	  u_char different_signs = signa ^ signb;
-
-	  FPU_copy_to_regi(a, TAG_Zero, deststnr);
-	  if (different_signs)
-	    {
-	      /* Signs are different. */
-	      /* Sign of answer depends upon rounding mode. */
-	      setsign(dest, ((control_w & CW_RC) != RC_DOWN)
-		      ? SIGN_POS : SIGN_NEG);
-	    }
-	  else
-	    setsign(dest, signa);  /* signa may differ from the sign of a. */
-	  return TAG_Zero;
-	}
-      else
-	{
-	  reg_copy(b, dest);
-	  if ((tagb == TW_Denormal) && (b->sigh & 0x80000000))
-	    {
-	      /* A pseudoDenormal, convert it. */
-	      addexponent(dest, 1);
-	      tagb = TAG_Valid;
-	    }
-	  else if (tagb > TAG_Empty)
-	    tagb = TAG_Special;
-	  setsign(dest, signb);  /* signb may differ from the sign of b. */
-	  FPU_settagi(deststnr, tagb);
-	  return tagb;
-	}
-    }
-  else if (tagb == TAG_Zero && taga != TW_Infinity)
-    {
-      reg_copy(a, dest);
-      if ((taga == TW_Denormal) && (a->sigh & 0x80000000))
-	{
-	  /* A pseudoDenormal */
-	  addexponent(dest, 1);
-	  taga = TAG_Valid;
-	}
-      else if (taga > TAG_Empty)
-	taga = TAG_Special;
-      setsign(dest, signa);  /* signa may differ from the sign of a. */
-      FPU_settagi(deststnr, taga);
-      return taga;
-    }
-  else if (taga == TW_Infinity)
-    {
-      if ((tagb != TW_Infinity) || (signa == signb))
-	{
-	  FPU_copy_to_regi(a, TAG_Special, deststnr);
-	  setsign(dest, signa);  /* signa may differ from the sign of a. */
-	  return taga;
-	}
-      /* Infinity-Infinity is undefined. */
-      return arith_invalid(deststnr);
-    }
-  else if (tagb == TW_Infinity)
-    {
-      FPU_copy_to_regi(b, TAG_Special, deststnr);
-      setsign(dest, signb);  /* signb may differ from the sign of b. */
-      return tagb;
-    }
-
-#ifdef PARANOID
-  INTERNAL(0x101);
-#endif
-
-  return FPU_Exception;
-}
-
-static
-int add_sub_specials(FPU_REG const *a, u_char taga, u_char signa,
-                     FPU_REG const *b, u_char tagb, u_char signb,
-                     FPU_REG *dest, int deststnr, u16 control_w)
-{
-    int tag = add_sub_specials_core(a, taga, signa, b, tagb, signb, dest, deststnr, control_w);
-    int sign = dest->exp < 0;
-    FPU_REG unrounded;
-    
-    /* no adjustment needed for add/sub zero with full precision */
-    if ((control_w & CW_PC) == PR_64_BITS)
-        if (taga == TAG_Zero || tagb == TAG_Zero) return tag;
-        
-    /* no adjustment needed for zero result */
-    if (tag == TAG_Zero) return tag;
-        
-    /* no adjustment needed for infinity result */
-    if (tag == TW_Infinity) return tag;
-        
-    /* no adjustment needed for real indefinite result */
-    if ((dest->exp & 0x7FFF) == 0x7FFF)
-        if (dest->sigh == 0xC0000000)
-            if (dest->sigl == 0)
-                return tag;
-                
-     unrounded = *dest;
-     dest->exp &= 0x7FFF;
-     dest->exp -= EXTENDED_Ebias;
-                
-     /* if denormal, some adjustments are required before passing to FPU_round */
-     if ((dest->sigh & 0x80000000) == 0) 
-     {
-         dest->exp++;
-         tag = FPU_round (dest, 0, control_w, sign);
-         if ((dest->sigh & 0x80000000) == 0) 
-             dest->exp--;
-         if ((dest->exp & 0x7FFF) == 0) 
-             if (dest->exp != unrounded.exp || dest->sigh != unrounded.sigh || dest->sigl != unrounded.sigl)
-                 EXCEPTION(EX_Underflow);
-     }
-     else tag = FPU_round (dest, 0, control_w, sign);
-     return tag;
-}
--- a/bochs/fpu/reg_compare.c
+++ b/bochs/fpu/reg_compare.c
@ -1,388 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  reg_compare.c                                                            |
- |  $Id: reg_compare.c,v 1.6 2003-10-04 12:32:56 sshwarts Exp $
- |                                                                           |
- | Compare two floating point registers                                      |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1997                                         |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@suburbia.net                              |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- | compare() is the core FPU_REG comparison function                         |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_system.h"
-#include "exception.h"
-#include "fpu_emu.h"
-#include "control_w.h"
-#include "status_w.h"
-
-
-static int compare(FPU_REG const *b, int tagb)
-{
-  int diff, exp0, expb;
-  u_char	  	st0_tag;
-  FPU_REG  	*st0_ptr;
-  FPU_REG	x, y;
-  u_char		st0_sign, signb = getsign(b);
-
-  st0_ptr = &st(0);
-  st0_tag = FPU_gettag0();
-  st0_sign = getsign(st0_ptr);
-
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
-  if ( st0_tag == TAG_Special )
-    st0_tag = FPU_Special(st0_ptr);
-
-  if ( ((st0_tag != TAG_Valid) && (st0_tag != TW_Denormal))
-       || ((tagb != TAG_Valid) && (tagb != TW_Denormal)) )
-    {
-      if ( st0_tag == TAG_Zero )
-	{
-	  if ( tagb == TAG_Zero ) return COMP_A_eq_B;
-	  if ( tagb == TAG_Valid )
-	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
-	  if ( tagb == TW_Denormal )
-	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
-	    | COMP_Denormal;
-	}
-      else if ( tagb == TAG_Zero )
-	{
-	  if ( st0_tag == TAG_Valid )
-	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
-	  if ( st0_tag == TW_Denormal )
-	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
-	    | COMP_Denormal;
-	}
-
-      if ( st0_tag == TW_Infinity )
-	{
-	  if ( (tagb == TAG_Valid) || (tagb == TAG_Zero) )
-	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
-	  else if ( tagb == TW_Denormal )
-	    return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
-	      | COMP_Denormal;
-	  else if ( tagb == TW_Infinity )
-	    {
-	      /* The 80486 book says that infinities can be equal! */
-	      return (st0_sign == signb) ? COMP_A_eq_B :
-		((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B);
-	    }
-	  /* Fall through to the NaN code */
-	}
-      else if ( tagb == TW_Infinity )
-	{
-	  if ( (st0_tag == TAG_Valid) || (st0_tag == TAG_Zero) )
-	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B);
-	  if ( st0_tag == TW_Denormal )
-	    return ((signb == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
-		| COMP_Denormal;
-	  /* Fall through to the NaN code */
-	}
-
-      /* The only possibility now should be that one of the arguments
-	 is a NaN */
-      if ( (st0_tag == TW_NaN) || (tagb == TW_NaN) )
-	{
-	  int signalling = 0, unsupported = 0;
-	  if ( st0_tag == TW_NaN )
-	    {
-	      signalling = (st0_ptr->sigh & 0xc0000000) == 0x80000000;
-	      unsupported = !((exponent(st0_ptr) == EXP_OVER)
-			      && (st0_ptr->sigh & 0x80000000));
-	    }
-	  if ( tagb == TW_NaN )
-	    {
-	      signalling |= (b->sigh & 0xc0000000) == 0x80000000;
-	      unsupported |= !((exponent(b) == EXP_OVER)
-			       && (b->sigh & 0x80000000));
-	    }
-	  if ( signalling || unsupported )
-	    return COMP_No_Comp | COMP_SNaN | COMP_NaN;
-	  else
-	    /* Neither is a signaling NaN */
-	    return COMP_No_Comp | COMP_NaN;
-	}
-      
-      EXCEPTION(EX_Invalid);
-    }
-  
-  if (st0_sign != signb)
-    {
-      return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
-	| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
-	    COMP_Denormal : 0);
-    }
-
-  if ( (st0_tag == TW_Denormal) || (tagb == TW_Denormal) )
-    {
-      FPU_to_exp16(st0_ptr, &x);
-      FPU_to_exp16(b, &y);
-      st0_ptr = &x;
-      b = &y;
-      exp0 = exponent16(st0_ptr);
-      expb = exponent16(b);
-    }
-  else
-    {
-      exp0 = exponent(st0_ptr);
-      expb = exponent(b);
-    }
-
-#ifdef PARANOID
-  if (!(st0_ptr->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
-  if (!(b->sigh & 0x80000000)) EXCEPTION(EX_Invalid);
-#endif /* PARANOID */
-
-  diff = exp0 - expb;
-  if ( diff == 0 )
-    {
-      diff = st0_ptr->sigh - b->sigh;  /* Works only if ms bits are
-					      identical */
-      if ( diff == 0 )
-	{
-	diff = st0_ptr->sigl > b->sigl;
-	if ( diff == 0 )
-	  diff = -(st0_ptr->sigl < b->sigl);
-	}
-    }
-
-  if ( diff > 0 )
-    {
-      return ((st0_sign == SIGN_POS) ? COMP_A_gt_B : COMP_A_lt_B)
-	| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
-	    COMP_Denormal : 0);
-    }
-  if ( diff < 0 )
-    {
-      return ((st0_sign == SIGN_POS) ? COMP_A_lt_B : COMP_A_gt_B)
-	| ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
-	    COMP_Denormal : 0);
-    }
-
-  return COMP_A_eq_B
-    | ( ((st0_tag == TW_Denormal) || (tagb == TW_Denormal)) ?
-	COMP_Denormal : 0);
-
-}
-
-
-/* This function requires that st(0) is not empty */
-int FPU_compare_st_data(FPU_REG const *loaded_data, u_char loaded_tag)
-{
-  int f, c;
-
-  c = compare(loaded_data, loaded_tag);
-
-  if (c & COMP_NaN)
-    {
-      EXCEPTION(EX_Invalid);
-      f = SW_C3 | SW_C2 | SW_C0;
-    }
-  else
-    switch (c & 7)
-      {
-      case COMP_A_lt_B:
-	f = SW_C0;
-	break;
-      case COMP_A_eq_B:
-	f = SW_C3;
-	break;
-      case COMP_A_gt_B:
-	f = 0;
-	break;
-      case COMP_No_Comp:
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
-#ifdef PARANOID
-      default:
-	INTERNAL(0x121);
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
-#endif /* PARANOID */
-      }
-  setcc(f);
-  if (c & COMP_Denormal)
-    {
-      return denormal_operand() < 0;
-    }
-  return 0;
-}
-
-
-static int compare_st_st(int nr)
-{
-  int f, c;
-  FPU_REG *st_ptr;
-
-  if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
-    {
-      setcc(SW_C3 | SW_C2 | SW_C0);
-      /* Stack fault */
-      EXCEPTION(EX_StackUnder);
-      return !(FPU_control_word & CW_Invalid);
-    }
-
-  st_ptr = &st(nr);
-  c = compare(st_ptr, FPU_gettagi(nr));
-  if (c & COMP_NaN)
-    {
-      setcc(SW_C3 | SW_C2 | SW_C0);
-      EXCEPTION(EX_Invalid);
-      return !(FPU_control_word & CW_Invalid);
-    }
-  else
-    switch (c & 7)
-      {
-      case COMP_A_lt_B:
-	f = SW_C0;
-	break;
-      case COMP_A_eq_B:
-	f = SW_C3;
-	break;
-      case COMP_A_gt_B:
-	f = 0;
-	break;
-      case COMP_No_Comp:
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
-#ifdef PARANOID
-      default:
-	INTERNAL(0x122);
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
-#endif /* PARANOID */
-      }
-  setcc(f);
-  if (c & COMP_Denormal)
-    {
-      return denormal_operand() < 0;
-    }
-  return 0;
-}
-
-
-static int compare_u_st_st(int nr)
-{
-  int f, c;
-  FPU_REG *st_ptr;
-
-  if ( !NOT_EMPTY(0) || !NOT_EMPTY(nr) )
-    {
-      setcc(SW_C3 | SW_C2 | SW_C0);
-      /* Stack fault */
-      EXCEPTION(EX_StackUnder);
-      return !(FPU_control_word & CW_Invalid);
-    }
-
-  st_ptr = &st(nr);
-  c = compare(st_ptr, FPU_gettagi(nr));
-  if (c & COMP_NaN)
-    {
-      setcc(SW_C3 | SW_C2 | SW_C0);
-      if (c & COMP_SNaN)       /* This is the only difference between
-				  un-ordered and ordinary comparisons */
-	{
-	  EXCEPTION(EX_Invalid);
-	  return !(FPU_control_word & CW_Invalid);
-	}
-      return 0;
-    }
-  else
-    switch (c & 7)
-      {
-      case COMP_A_lt_B:
-	f = SW_C0;
-	break;
-      case COMP_A_eq_B:
-	f = SW_C3;
-	break;
-      case COMP_A_gt_B:
-	f = 0;
-	break;
-      case COMP_No_Comp:
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
-#ifdef PARANOID
-      default:
-	INTERNAL(0x123);
-	f = SW_C3 | SW_C2 | SW_C0;
-	break;
-#endif /* PARANOID */
-      }
-  setcc(f);
-  if (c & COMP_Denormal)
-    {
-      return denormal_operand() < 0;
-    }
-  return 0;
-}
-
-/*---------------------------------------------------------------------------*/
-
-void fcom_st()
-{
-  /* fcom st(i) */
-  compare_st_st(FPU_rm);
-}
-
-
-void fcompst()
-{
-  /* fcomp st(i) */
-  if ( !compare_st_st(FPU_rm) )
-    FPU_pop();
-}
-
-
-void fcompp()
-{
-  /* fcompp */
-  if (FPU_rm != 1)
-    {
-      FPU_illegal();
-      return;
-    }
-  if (!compare_st_st(1))
-  {
-      FPU_pop();
-      FPU_pop();
-  }
-}
-
-
-void fucom_()
-{
-  /* fucom st(i) */
-  compare_u_st_st(FPU_rm);
-
-}
-
-
-void fucomp()
-{
-  /* fucomp st(i) */
-  if ( !compare_u_st_st(FPU_rm) )
-    FPU_pop();
-}
-
-
-void fucompp()
-{
-  /* fucompp */
-  if (FPU_rm == 1)
-    {
-      if (!compare_u_st_st(1))
-      {
-          FPU_pop();
-          FPU_pop();
-      }
-    }
-  else
-    FPU_illegal();
-}
--- a/bochs/fpu/reg_constant.c
+++ b/bochs/fpu/reg_constant.c
@ -1,116 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  reg_constant.c                                                           |
- |  $Id: reg_constant.c,v 1.7 2004-03-05 09:19:58 sshwarts Exp $
- |                                                                           |
- | All of the constant FPU_REGs                                              |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1997                                         |
- |                     W. Metzenthen, 22 Parker St, Ormond, Vic 3163,        |
- |                     Australia.  E-mail   billm@suburbia.net               |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_system.h"
-#include "fpu_emu.h"
-#include "status_w.h"
-#include "reg_constant.h"
-#include "control_w.h"
-
-FPU_REG const CONST_1    = MAKE_REG(POS, 0, 0x00000000, 0x80000000);
-FPU_REG const CONST_L2T  = MAKE_REG(POS, 1, 0xcd1b8afe, 0xd49a784b);
-FPU_REG const CONST_L2E  = MAKE_REG(POS, 0, 0x5c17f0bc, 0xb8aa3b29);
-FPU_REG const CONST_PI   = MAKE_REG(POS, 1, 0x2168c235, 0xc90fdaa2);
-/* bbd: make CONST_PI2 non-const so that you can write "&CONST_PI2" when
-   calling a function.  Otherwise you get const warnings.  Surely there's
-   a better way. */
-FPU_REG CONST_PI2  = MAKE_REG(POS, 0, 0x2168c235, 0xc90fdaa2);
-FPU_REG const CONST_PI4  = MAKE_REG(POS, -1, 0x2168c235, 0xc90fdaa2);
-FPU_REG const CONST_LG2  = MAKE_REG(POS, -2, 0xfbcff799, 0x9a209a84);
-FPU_REG const CONST_LN2  = MAKE_REG(POS, -1, 0xd1cf79ac, 0xb17217f7);
-
-/* Extra bits to take pi/2 to more than 128 bits precision. */
-FPU_REG const CONST_PI2extra = MAKE_REG(NEG, -66,
-					 0xfc8f8cbb, 0xece675d1);
-
-/* Only the sign (and tag) is used in internal zeroes */
-FPU_REG const CONST_Z    = MAKE_REG(POS, EXP_UNDER, 0x0, 0x0);
-
-/* Only the sign and significand (and tag) are used in internal NaNs */
-/* The 80486 never generates one of these 
-FPU_REG const CONST_SNAN = MAKE_REG(POS, EXP_OVER, 0x00000001, 0x80000000);
- */
-/* This is the real indefinite QNaN */
-FPU_REG const CONST_QNaN = MAKE_REG(NEG, EXP_OVER, 0x00000000, 0xC0000000);
-
-/* Only the sign (and tag) is used in internal infinities */
-FPU_REG const CONST_INF  = MAKE_REG(POS, EXP_OVER, 0x00000000, 0x80000000);
-
-
-static void fld_const(FPU_REG const *c, int adj, u_char tag)
-{
-  FPU_REG *st_new_ptr;
-
-  if (FPU_stackoverflow(&st_new_ptr))
-    {
-      FPU_stack_overflow();
-      return;
-    }
-  FPU_push();
-  reg_copy(c, st_new_ptr);
-  st_new_ptr->sigl += adj;  /* For all our fldxxx constants, we don't need to
-			       borrow or carry. */
-  FPU_settag0(tag);
-  clear_C1();
-}
-
-/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP
-   (and not one of RC_RND or RC_UP).
-   */
-#define DOWN_OR_CHOP(x)  (x & RC_DOWN)
-
-static void fld1(int rc)
-{
-  fld_const(&CONST_1, 0, TAG_Valid);
-}
-
-static void fldl2t(int rc)
-{
-  fld_const(&CONST_L2T, (rc == RC_UP) ? 1 : 0, TAG_Valid);
-}
-
-static void fldl2e(int rc)
-{
-  fld_const(&CONST_L2E, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
-}
-
-static void fldpi(int rc)
-{
-  fld_const(&CONST_PI, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
-}
-
-static void fldlg2(int rc)
-{
-  fld_const(&CONST_LG2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
-}
-
-static void fldln2(int rc)
-{
-  fld_const(&CONST_LN2, DOWN_OR_CHOP(rc) ? -1 : 0, TAG_Valid);
-}
-
-static void fldz(int rc)
-{
-  fld_const(&CONST_Z, 0, TAG_Zero);
-}
-
-typedef void (*FUNC_RC)(int);
-
-static FUNC_RC constants_table[] = {
-  fld1, fldl2t, fldl2e, fldpi, fldlg2, fldln2, fldz, (FUNC_RC)FPU_illegal
-};
-
-void fconst(void)
-{
-  (constants_table[FPU_rm])(FPU_control_word & CW_RC);
-}
--- a/bochs/fpu/reg_constant.h
+++ b/bochs/fpu/reg_constant.h
@ -1,34 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  reg_constant.h                                                           |
- |  $Id: reg_constant.h,v 1.6 2004-03-05 09:19:58 sshwarts Exp $
- |                                                                           |
- | Copyright (C) 1992    W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#ifndef _REG_CONSTANT_H_
-#define _REG_CONSTANT_H_
-
-#include "fpu_emu.h"
-
-extern FPU_REG const CONST_1;
-extern FPU_REG const CONST_L2T;
-extern FPU_REG const CONST_L2E;
-extern FPU_REG const CONST_PI;
-
-/*
- * make CONST_PI2 non-const so that you can write "&CONST_PI2" when
- * calling a function.  Otherwise you get const warnings.  Surely there's
- * a better way.
- */
-extern FPU_REG CONST_PI2;
-extern FPU_REG const CONST_PI2extra;
-extern FPU_REG const CONST_PI4;
-extern FPU_REG const CONST_LG2;
-extern FPU_REG const CONST_LN2;
-extern FPU_REG const CONST_Z;
-extern FPU_REG const CONST_INF;
-extern FPU_REG const CONST_QNaN;
-
-#endif /* _REG_CONSTANT_H_ */
--- a/bochs/fpu/reg_convert.c
+++ b/bochs/fpu/reg_convert.c
@ -1,59 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  reg_convert.c                                                            |
- |  $Id: reg_convert.c,v 1.5 2003-10-04 12:32:56 sshwarts Exp $
- |                                                                           |
- |  Convert register representation.                                         |
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1996,1997                                    |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@suburbia.net                              |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "fpu_emu.h"
-
-
-int  BX_CPP_AttrRegparmN(2)
-FPU_to_exp16(FPU_REG const *a, FPU_REG *x)
-{
-  int sign = getsign(a);
-
-#ifndef EMU_BIG_ENDIAN
-  *(s64 *)&(x->sigl) = *(const s64 *)&(a->sigl);
-#else
-  *(s64 *)&(x->sigh) = *(const s64 *)&(a->sigh);
-#endif
-
-  /* Set up the exponent as a 16 bit quantity. */
-  setexponent16(x, exponent(a));
-
-  if ( exponent16(x) == EXP_UNDER )
-    {
-      /* The number is a de-normal or pseudodenormal. */
-      /* We only deal with the significand and exponent. */
-
-      if (x->sigh & 0x80000000)
-	{
-	  /* Is a pseudodenormal. */
-	  /* This is non-80486 behaviour because the number
-	     loses its 'denormal' identity. */
-	  addexponent(x, 1);
-	}
-      else
-	{
-	  /* Is a denormal. */
-	  addexponent(x, 1);
-          FPU_normalize_nuo(x, 0);
-	}
-    }
-
-  if ( !(x->sigh & 0x80000000) )
-    {
-      INTERNAL(0x180);
-    }
-
-  return sign;
-}
-
--- a/bochs/fpu/reg_divide.c
+++ b/bochs/fpu/reg_divide.c
@ -1,209 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  reg_divide.c                                                             |
- |  $Id: reg_divide.c,v 1.5 2003-10-04 12:32:56 sshwarts Exp $
- |                                                                           |
- | Divide one FPU_REG by another and put the result in a destination FPU_REG.|
- |                                                                           |
- | Copyright (C) 1996                                                        |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@jacobi.maths.monash.edu.au                |
- |                                                                           |
- |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
- |    one was raised, or -1 on internal error.                               |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- | The destination may be any FPU_REG, including one of the source FPU_REGs. |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "reg_constant.h"
-#include "fpu_emu.h"
-#include "fpu_system.h"
-
-/*
-  Divide one register by another and put the result into a third register.
-bbd: arg2 used to be an int, see comments on FPU_sub.
-  */
-int FPU_div(int flags, FPU_REG *rm, int control_w)
-{
-  FPU_REG x, y;
-  FPU_REG const *a, *b, *st0_ptr, *st_ptr;
-  FPU_REG *dest;
-  u_char taga, tagb, signa, signb, sign, saved_sign;
-  int tag, deststnr;
-  int rmint = PTR2INT(rm);
-
-  if ( flags & DEST_RM )
-    deststnr = rmint;
-  else
-    deststnr = 0;
-
-  if ( flags & REV )
-    {
-      b = &st(0);
-      st0_ptr = b;
-      tagb = FPU_gettag0();
-      if ( flags & LOADED )
-	{
-	  a = rm;
-	  taga = flags & 0x0f;
-	}
-      else
-	{
-	  a = &st(rmint);
-	  st_ptr = a;
-	  taga = FPU_gettagi(rmint);
-	}
-    }
-  else
-    {
-      a = &st(0);
-      st0_ptr = a;
-      taga = FPU_gettag0();
-      if ( flags & LOADED )
-	{
-	  b = rm;
-	  tagb = flags & 0x0f;
-	}
-      else
-	{
-	  b = &st(rmint);
-	  st_ptr = b;
-	  tagb = FPU_gettagi(rmint);
-	}
-    }
-
-  signa = getsign(a);
-  signb = getsign(b);
-
-  sign = signa ^ signb;
-
-  dest = &st(deststnr);
-  saved_sign = getsign(dest);
-
-  if ( !(taga | tagb) )
-    {
-      /* Both regs Valid, this should be the most common case. */
-      reg_copy(a, &x);
-      reg_copy(b, &y);
-      setpositive(&x);
-      setpositive(&y);
-      tag = FPU_u_div(&x, &y, dest, control_w, sign);
-
-      if ( tag < 0 )
-	return tag;
-
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-
-  if ( taga == TAG_Special )
-    taga = FPU_Special(a);
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
-
-  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
-	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
-	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
-    {
-      if ( denormal_operand() < 0 )
-	return FPU_Exception;
-
-      FPU_to_exp16(a, &x);
-      FPU_to_exp16(b, &y);
-      tag = FPU_u_div(&x, &y, dest, control_w, sign);
-      if ( tag < 0 )
-	return tag;
-
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-  else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
-    {
-      if ( tagb != TAG_Zero )
-	{
-	  /* Want to find Zero/Valid */
-	  if ( tagb == TW_Denormal )
-	    {
-	      if ( denormal_operand() < 0 )
-		return FPU_Exception;
-	    }
-
-	  /* The result is zero. */
-	  FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-	  setsign(dest, sign);
-	  return TAG_Zero;
-	}
-      /* We have an exception condition, either 0/0 or Valid/Zero. */
-      if ( taga == TAG_Zero )
-	{
-	  /* 0/0 */
-	  return arith_invalid(deststnr);
-	}
-      /* Valid/Zero */
-      return FPU_divide_by_zero(deststnr, sign);
-    }
-  /* Must have infinities, NaNs, etc */
-  else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
-    {
-      if ( flags & LOADED )
-	return real_2op_NaN((FPU_REG *)rm, flags & 0x0f, 0, st0_ptr);
-
-      if ( flags & DEST_RM )
-	{
-	  int tag;
-	  tag = FPU_gettag0();
-	  if ( tag == TAG_Special )
-	    tag = FPU_Special(st0_ptr);
-	  return real_2op_NaN(st0_ptr, tag, rmint, (flags & REV) ? st0_ptr : &st(rmint));
-	}
-      else
-	{
-	  int tag;
-	  tag = FPU_gettagi(rmint);
-	  if ( tag == TAG_Special )
-	    tag = FPU_Special(&st(rmint));
-	  return real_2op_NaN(&st(rmint), tag, 0, (flags & REV) ? st0_ptr : &st(rmint));
-	}
-    }
-  else if (taga == TW_Infinity)
-    {
-      if (tagb == TW_Infinity)
-	{
-	  /* infinity/infinity */
-	  return arith_invalid(deststnr);
-	}
-      else
-	{
-	  /* tagb must be Valid or Zero */
-	  if ( (tagb == TW_Denormal) && (denormal_operand() < 0) )
-	    return FPU_Exception;
-	  
-	  /* Infinity divided by Zero or Valid does
-	     not raise and exception, but returns Infinity */
-	  FPU_copy_to_regi(a, TAG_Special, deststnr);
-	  setsign(dest, sign);
-	  return taga;
-	}
-    }
-  else if (tagb == TW_Infinity)
-    {
-      if ( (taga == TW_Denormal) && (denormal_operand() < 0) )
-	return FPU_Exception;
-
-      /* The result is zero. */
-      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-      setsign(dest, sign);
-      return TAG_Zero;
-    }
-#ifdef PARANOID
-  else
-    {
-      INTERNAL(0x102);
-      return FPU_Exception;
-    }
-#endif /* PARANOID */
-
-}
--- a/bochs/fpu/reg_ld_str.c
+++ b/bochs/fpu/reg_ld_str.c
--- a/bochs/fpu/reg_mul.c
+++ b/bochs/fpu/reg_mul.c
@ -1,132 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  reg_mul.c                                                                |
- |  $Id: reg_mul.c,v 1.4 2003-10-04 12:32:56 sshwarts Exp $
- |                                                                           |
- | Multiply one FPU_REG by another, put the result in a destination FPU_REG. |
- |                                                                           |
- | Copyright (C) 1992,1993,1997                                              |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@suburbia.net                              |
- |                                                                           |
- | Returns the tag of the result if no exceptions or errors occurred.        |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- | The destination may be any FPU_REG, including one of the source FPU_REGs. |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_emu.h"
-#include "exception.h"
-#include "reg_constant.h"
-#include "fpu_system.h"
-
-
-/*
-  Multiply two registers to give a register result.
-  The sources are st(deststnr) and (b,tagb,signb).
-  The destination is st(deststnr).
-  */
-/* This routine must be called with non-empty source registers */
-int FPU_mul(FPU_REG const *b, u_char tagb, int deststnr, int control_w)
-{
-  FPU_REG *a = &st(deststnr);
-  FPU_REG *dest = a;
-  u_char taga = FPU_gettagi(deststnr);
-  u_char saved_sign = getsign(dest);
-  u_char sign = (getsign(a) ^ getsign(b));
-  int tag;
-
-
-  if ( !(taga | tagb) )
-    {
-      /* Both regs Valid, this should be the most common case. */
-
-      tag = FPU_u_mul(a, b, dest, control_w, sign, exponent(a) + exponent(b));
-      if ( tag < 0 )
-	{
-	  setsign(dest, saved_sign);
-	  return tag;
-	}
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-
-  if ( taga == TAG_Special )
-    taga = FPU_Special(a);
-  if ( tagb == TAG_Special )
-    tagb = FPU_Special(b);
-
-  if ( ((taga == TAG_Valid) && (tagb == TW_Denormal))
-	    || ((taga == TW_Denormal) && (tagb == TAG_Valid))
-	    || ((taga == TW_Denormal) && (tagb == TW_Denormal)) )
-    {
-      FPU_REG x, y;
-      if ( denormal_operand() < 0 )
-	return FPU_Exception;
-
-      FPU_to_exp16(a, &x);
-      FPU_to_exp16(b, &y);
-      tag = FPU_u_mul(&x, &y, dest, control_w, sign,
-		      exponent16(&x) + exponent16(&y));
-      if ( tag < 0 )
-	{
-	  setsign(dest, saved_sign);
-	  return tag;
-	}
-      FPU_settagi(deststnr, tag);
-      return tag;
-    }
-  else if ( (taga <= TW_Denormal) && (tagb <= TW_Denormal) )
-    {
-      if ( ((tagb == TW_Denormal) || (taga == TW_Denormal))
-	   && (denormal_operand() < 0) )
-	return FPU_Exception;
-
-      /* Must have either both arguments == zero, or
-	 one valid and the other zero.
-	 The result is therefore zero. */
-      FPU_copy_to_regi(&CONST_Z, TAG_Zero, deststnr);
-      /* The 80486 book says that the answer is +0, but a real
-	 80486 behaves this way.
-	 IEEE-754 apparently says it should be this way. */
-      setsign(dest, sign);
-      return TAG_Zero;
-    }
-      /* Must have infinities, NaNs, etc */
-  else if ( (taga == TW_NaN) || (tagb == TW_NaN) )
-    {
-      return real_2op_NaN(b, tagb, deststnr, &st(0));
-    }
-  else if ( ((taga == TW_Infinity) && (tagb == TAG_Zero))
-	    || ((tagb == TW_Infinity) && (taga == TAG_Zero)) )
-    {
-      return arith_invalid(deststnr);  /* Zero*Infinity is invalid */
-    }
-  else if ( ((taga == TW_Denormal) || (tagb == TW_Denormal))
-	    && (denormal_operand() < 0) )
-    {
-      return FPU_Exception;
-    }
-  else if (taga == TW_Infinity)
-    {
-      FPU_copy_to_regi(a, TAG_Special, deststnr);
-      setsign(dest, sign);
-      return TAG_Special;
-    }
-  else if (tagb == TW_Infinity)
-    {
-      FPU_copy_to_regi(b, TAG_Special, deststnr);
-      setsign(dest, sign);
-      return TAG_Special;
-    }
-
-#ifdef PARANOID
-  else
-    {
-      INTERNAL(0x102);
-      return FPU_Exception;
-    }
-#endif /* PARANOID */
-
-}
--- a/bochs/fpu/reg_norm.c
+++ b/bochs/fpu/reg_norm.c
@ -1,46 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  reg_norm.c                                                               |
- |  $Id: reg_norm.c,v 1.4 2003-08-01 09:32:33 sshwarts Exp $
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1995,1997,1999                               |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@melbpc.org.au              |
- |                                                                           |
- | Normalize the value in a FPU_REG.                                         |
- |                                                                           |
- |                                                                           |
- |    Return value is the tag of the answer.                                 |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_emu.h"
-
-int FPU_normalize_nuo(FPU_REG *x, int bias)
-{
-  if (! (x->sigh & 0x80000000))
-    {
-      if (x->sigh == 0)
-	{
-	  if (x->sigl == 0)
-	    {
-	      x->exp = 0;
-	      return TAG_Zero;
-	    }
-	  x->sigh = x->sigl;
-	  x->sigl = 0;
-	  x->exp -= 32;
-	}
-      while (!(x->sigh & 0x80000000))
-	{
-	  x->sigh <<= 1;
-	  if (x->sigl & 0x80000000)
-	    x->sigh |= 1;
-	  x->sigl <<= 1;
-	  x->exp --;
-	}
-    }
-
-  x->exp += bias;
-
-  return TAG_Valid;
-}
--- a/bochs/fpu/reg_round.c
+++ b/bochs/fpu/reg_round.c
@ -1,547 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  reg_round.c                                                              |
- |  $Id: reg_round.c,v 1.8 2003-10-05 12:26:11 sshwarts Exp $
- |                                                                           |
- | Rounding/truncation/etc for FPU basic arithmetic functions.               |
- |                                                                           |
- | Copyright (C) 1993,1995,1997,1999                                         |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@melbpc.org.au              |
- |                                                                           |
- | This code has four possible entry points.                                 |
- | The following must be entered by a jmp instruction:                       |
- |   fpu_reg_round, fpu_reg_round_sqrt, and fpu_Arith_exit.                  |
- |                                                                           |
- | The FPU_round entry point is intended to be used by C code.               |
- |                                                                           |
- |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
- |    one was raised, or -1 on internal error.                               |
- |                                                                           |
- | For correct "up" and "down" rounding, the argument must have the correct  |
- | sign.                                                                     |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- |                                                                           |
- | The significand and its extension are assumed to be exact in the          |
- | following sense:                                                          |
- |   If the significand by itself is the exact result then the significand   |
- |   extension (%edx) must contain 0, otherwise the significand extension    |
- |   must be non-zero.                                                       |
- |   If the significand extension is non-zero then the significand is        |
- |   smaller than the magnitude of the correct exact result by an amount     |
- |   greater than zero and less than one ls bit of the significand.          |
- |   The significand extension is only required to have three possible       |
- |   non-zero values:                                                        |
- |       less than 0x80000000  <=> the significand is less than 1/2 an ls    |
- |                                 bit smaller than the magnitude of the     |
- |                                 true exact result.                        |
- |         exactly 0x80000000  <=> the significand is exactly 1/2 an ls bit  |
- |                                 smaller than the magnitude of the true    |
- |                                 exact result.                             |
- |    greater than 0x80000000  <=> the significand is more than 1/2 an ls    |
- |                                 bit smaller than the magnitude of the     |
- |                                 true exact result.                        |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- |  The code in this module has become quite complex, but it should handle   |
- |  all of the FPU flags which are set at this stage of the basic arithmetic |
- |  computations.                                                            |
- |  There are a few rare cases where the results are not set identically to  |
- |  a real FPU. These require a bit more thought because at this stage the   |
- |  results of the code here appear to be more consistent...                 |
- |  This may be changed in a future version.                                 |
- +---------------------------------------------------------------------------*/
-
-
-#include "fpu_emu.h"
-#include "exception.h"
-#include "control_w.h"
-
-/* Flags for FPU_bits_lost */
-#define	LOST_DOWN	1
-#define	LOST_UP		2
-
-/* Flags for FPU_denormal */
-#define	DENORMAL	1
-#define	UNMASKED_UNDERFLOW 2
-
-
-static int round_up_64(FPU_REG *x)
-{
-  x->sigl ++;
-  if (x->sigl == 0)
-    {
-      x->sigh ++;
-      if (x->sigh == 0)
-	{
-	  x->sigh = 0x80000000;
-	  x->exp ++;
-	}
-    }
-  return LOST_UP;
-}
-
-static int truncate_64(FPU_REG *x)
-{
-  return LOST_DOWN;
-}
-
-static int round_up_53(FPU_REG *x)
-{
-  x->sigl &= 0xfffff800;
-  x->sigl += 0x800;
-  if (x->sigl == 0)
-    {
-      x->sigh ++;
-      if (x->sigh == 0)
-	{
-      x->sigh = 0x80000000;
-      x->exp ++;
-      /* if the above increment of a signed 16-bit value overflowed, substitute
-      the maximum positive exponent to force FPU_round to produce overflow */
-      if (x->exp == 0xFFFF8000) 
-          x->exp = 0x7FFF;
-	}
-    }
-  return LOST_UP;
-}
-
-static int truncate_53(FPU_REG *x)
-{
-  x->sigl &= 0xfffff800;
-  return LOST_DOWN;
-}
-
-static int round_up_24(FPU_REG *x)
-{
-  x->sigl = 0;
-  x->sigh &= 0xffffff00;
-  x->sigh += 0x100;
-  if (x->sigh == 0)
-    {
-      x->sigh = 0x80000000;
-      x->exp ++;
-      /* if the above increment of a signed 16-bit value overflowed, substitute
-      the maximum positive exponent to force FPU_round to produce overflow */
-      if (x->exp == 0xFFFF8000) 
-          x->exp = 0x7FFF;
-    }
-  return LOST_UP;
-}
-
-static int truncate_24(FPU_REG *x)
-{
-  x->sigl = 0;
-  x->sigh &= 0xffffff00;
-  return LOST_DOWN;
-}
-
-int FPU_round(FPU_REG *x, u32 extent, u16 control_w, u8 sign)
-{
-  u64 work;
-  u32 leading;
-  s16 expon = x->exp;
-  int FPU_bits_lost = 0, FPU_denormal, shift, tag, inexact = 0;
-
-  if (expon <= EXP_UNDER)
-    {
-      /* A denormal or zero */
-      if (control_w & CW_Underflow)
-	{
-	  /* Underflow is masked. */
-	  FPU_denormal = DENORMAL;
-	  shift = EXP_UNDER+1 - expon;
-	  if (shift >= 64)
-	    {
-	      if (shift == 64)
-		{
-		  x->exp += 64;
-		  if (extent | x->sigl)
-		    extent = x->sigh | 1;
-		  else
-		    extent = x->sigh;
-		}
-	      else
-		{
-		  x->exp = EXP_UNDER+1;
-		  extent = 1;
-		}
-	      significand(x) = 0;
-	    }
-	  else
-	    {
-	      x->exp += shift;
-	      if (shift >= 32)
-		{
-		  shift -= 32;
-		  if (shift)
-		    {
-		      extent |= x->sigl;
-		      work = significand(x) >> shift;
-		      if (extent)
-			extent = work | 1;
-		      else
-			extent = work;
-		      x->sigl = x->sigh >>= shift;
-		    }
-		  else
-		    {
-		      if (extent)
-			extent = x->sigl | 1;
-		      else
-			extent = x->sigl;
-		      x->sigl = x->sigh;
-		    }
-		  x->sigh = 0;
-		}
-	      else
-		{
-		  /* Shift by 1 to 32 places. */
-		  work = x->sigl;
-		  work <<= 32;
-		  work |= extent;
-		  work >>= shift;
-		  if (extent)
-		    extent = 1;
-		  extent |= work;
-		  significand(x) >>= shift;
-		}
-	    }
-	}
-      else
-	{
-	  /* Unmasked underflow. */
-	  FPU_denormal = UNMASKED_UNDERFLOW;
-	}
-    }
-  else
-    FPU_denormal = 0;
-
-  switch (control_w & CW_PC)
-    {
-    case 01:
-#ifndef PECULIAR_486
-      /* With the precision control bits set to 01 "(reserved)", a real 80486
-	 behaves as if the precision control bits were set to 11 "64 bits" */
-#ifdef PARANOID
-	INTERNAL(0x236);
-	return -1;
-#endif
-#endif
-	/* Fall through to the 64 bit case. */
-    case PR_64_BITS:
-      if (extent)
-	{
-	  switch (control_w & CW_RC)
-	    {
-	    case RC_RND:		/* Nearest or even */
-	      /* See if there is exactly half a ulp. */
-	      if (extent == 0x80000000)
-		{
-		  /* Round to even. */
-		  if (x->sigl & 0x1)
-		    /* Odd */
-		    FPU_bits_lost = round_up_64(x);
-		  else
-		    /* Even */
-		    FPU_bits_lost = truncate_64(x);
-		}
-	      else if (extent > 0x80000000)
-		{
-		  /* Greater than half */
-		  FPU_bits_lost = round_up_64(x);
-		}
-	      else
-		{
-		  /* Less than half */
-		  FPU_bits_lost = truncate_64(x);
-		}
-	      break;
-
-	    case RC_CHOP:		/* Truncate */
-	      FPU_bits_lost = truncate_64(x);
-	      break;
-	      
-	    case RC_UP:		/* Towards +infinity */
-	      if (sign == SIGN_POS)
-		{
-		  FPU_bits_lost = round_up_64(x);
-		}
-	      else
-		{
-		  FPU_bits_lost = truncate_64(x);
-		}
-	      break;
-
-	    case RC_DOWN:		/* Towards -infinity */
-	      if (sign != SIGN_POS)
-		{
-		  FPU_bits_lost = round_up_64(x);
-		}
-	      else
-		{
-		  FPU_bits_lost = truncate_64(x);
-		}
-	      break;
-
-	    default:
-	      INTERNAL(0x231);
-	      return -1;
-	    }
-	}
-      break;
-
-    case PR_53_BITS:
-      leading = x->sigl & 0x7ff;
-      if (extent || leading)
-	{
-	  switch (control_w & CW_RC)
-	    {
-	    case RC_RND:		/* Nearest or even */
-	      /* See if there is exactly half a ulp. */
-	      if (leading == 0x400)
-		{
-		  if (extent == 0)
-		    {
-		      /* Round to even. */
-		      if (x->sigl & 0x800)
-			/* Odd */
-			FPU_bits_lost = round_up_53(x);
-		      else
-			/* Even */
-			FPU_bits_lost = truncate_53(x);
-		    }
-		  else
-		    {
-		      /* Greater than half */
-		      FPU_bits_lost = round_up_53(x);
-		    }
-		}
-	      else if (leading > 0x400)
-		{
-		  /* Greater than half */
-		  FPU_bits_lost = round_up_53(x);
-		}
-	      else
-		{
-		  /* Less than half */
-		  FPU_bits_lost = truncate_53(x);
-		}
-	      break;
-
-	    case RC_CHOP:		/* Truncate */
-	      FPU_bits_lost = truncate_53(x);
-	      break;
-
-	    case RC_UP:		/* Towards +infinity */
-	      if (sign == SIGN_POS)
-		{
-		  FPU_bits_lost = round_up_53(x);
-		}
-	      else
-		{
-		  FPU_bits_lost = truncate_53(x);
-		}
-	      break;
-
-	    case RC_DOWN:		/* Towards -infinity */
-	      if (sign != SIGN_POS)
-		{
-		  FPU_bits_lost = round_up_53(x);
-		}
-	      else
-		{
-		  FPU_bits_lost = truncate_53(x);
-		}
-	      break;
-
-	    default:
-	      INTERNAL(0x231);
-	      return -1;
-	    }
-	}
-      inexact = FPU_bits_lost;
-      FPU_bits_lost = 0;
-      break;
-
-    case PR_24_BITS:
-      leading = x->sigh & 0xff;
-      if (leading || x->sigl || extent)
-	{
-	  switch (control_w & CW_RC)
-	    {
-	    case RC_RND:		/* Nearest or even */
-	      /* See if there is exactly half a ulp. */
-	      if (leading == 0x80)
-		{
-		  if ((x->sigl == 0) && (extent == 0))
-		    {
-		      /* Round to even. */
-		      if (x->sigh & 0x100)
-			/* Odd */
-			FPU_bits_lost = round_up_24(x);
-		      else
-			/* Even */
-			FPU_bits_lost = truncate_24(x);
-		    }
-		  else
-		    {
-		      /* Greater than half */
-		      FPU_bits_lost = round_up_24(x);
-		    }
-		}
-	      else if (leading > 0x80)
-		{
-		  /* Greater than half */
-		  FPU_bits_lost = round_up_24(x);
-		}
-	      else
-		{
-		  /* Less than half */
-		  FPU_bits_lost = truncate_24(x);
-		}
-	      break;
-
-	    case RC_CHOP:		/* Truncate */
-	      FPU_bits_lost = truncate_24(x);
-	      break;
-
-	    case RC_UP:		/* Towards +infinity */
-	      if (sign == SIGN_POS)
-		{
-		  FPU_bits_lost = round_up_24(x);
-		}
-	      else
-		{
-		  FPU_bits_lost = truncate_24(x);
-		}
-	      break;
-
-	    case RC_DOWN:		/* Towards -infinity */
-	      if (sign != SIGN_POS)
-		{
-		  FPU_bits_lost = round_up_24(x);
-		}
-	      else
-		{
-		  FPU_bits_lost = truncate_24(x);
-		}
-	      break;
-
-	    default:
-	      INTERNAL(0x231);
-	      return -1;
-	    }
-	}
-      inexact = FPU_bits_lost;
-      FPU_bits_lost = 0;
-      break;
-
-    default:
-#ifdef PARANOID
-	INTERNAL(0x230);
-	return -1;
-#endif
-      break;
-    }
-
-  tag = TAG_Valid;
-
-  if (FPU_denormal)
-    {
-      /* Undo the de-normalisation. */
-      if (FPU_denormal == UNMASKED_UNDERFLOW)
-	{
-	  if (x->exp <= EXP_UNDER)
-	    {
-	      /* Increase the exponent by the magic number */
-	      x->exp += 3 * (1 << 13);
-	      EXCEPTION(EX_Underflow);
-	    }
-	}
-      else
-	{
-	  if (x->exp != EXP_UNDER+1)
-	    {
-	      INTERNAL(0x234);
-	    }
-	  if ((x->sigh == 0) && (x->sigl == 0))
-	    {
-	      /* Underflow to zero */
-	      set_precision_flag_down();
-	      EXCEPTION(EX_Underflow);
-	      x->exp = EXP_UNDER;
-	      tag = TAG_Zero;
-	      FPU_bits_lost = 0;  /* Stop another call to
-				     set_precision_flag_down() */
-	    }
-	  else
-	    {
-	      if (x->sigh & 0x80000000)
-		{
-#ifdef PECULIAR_486
-	/*
-	 * This implements a special feature of 80486 behaviour.
-	 * Underflow will be signalled even if the number is
-	 * not a denormal after rounding.
-	 * This difference occurs only for masked underflow, and not
-	 * in the unmasked case.
-	 * Actual 80486 behaviour differs from this in some circumstances.
-	 */
-	      /* Will be masked underflow */
-#else
-	      /* No longer a denormal */
-#endif
-		}
-	      else
-#ifndef PECULIAR_486
-		{
-#endif
-		x->exp --;
-
-	      if (FPU_bits_lost  && (x->sigh & 0x80000000) == 0)
-		{
-		  /* There must be a masked underflow */
-		  EXCEPTION(EX_Underflow);
-		}
-
-              if (inexact && x->exp < -16382)
-                {
-                  /* There must be a masked underflow */
-                  EXCEPTION(EX_Underflow);
-                }
-        
-	      tag = TAG_Special;
-#ifndef PECULIAR_486
-		}
-#endif
-	    }
-	}
-    }
-
-
-  if (FPU_bits_lost == LOST_UP || inexact == LOST_UP)
-     set_precision_flag_up();
-       else   if (FPU_bits_lost == LOST_DOWN || inexact == LOST_DOWN)
-     set_precision_flag_down();
-
-  if (x->exp >= EXP_OVER)
-    {
-      x->exp += EXTENDED_Ebias;
-      tag = arith_round_overflow(x, sign);
-    }
-  else
-    {
-      x->exp += EXTENDED_Ebias;
-      x->exp &= 0x7fff;
-    }
-
-  if (sign != SIGN_POS)
-    x->exp |= 0x8000;
-
-  return tag;
-}
--- a/bochs/fpu/reg_u_add.c
+++ b/bochs/fpu/reg_u_add.c
@ -1,145 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  reg_u_add.c                                                              |
- |  $Id: reg_u_add.c,v 1.7 2003-10-05 12:26:11 sshwarts Exp $
- |                                                                           |
- | Add two valid (TAG_Valid) FPU_REG numbers, of the same sign, and put the  |
- |   result in a destination FPU_REG.                                        |
- |                                                                           |
- | Copyright (C) 1992,1993,1995,1997,1999                                    |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@melbpc.org.au                             |
- |                                                                           |
- |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
- |    one was raised, or -1 on internal error.                               |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*
- |    Kernel addition routine FPU_u_add(reg *arg1, reg *arg2, reg *answ).
- |    Takes two valid reg f.p. numbers (TAG_Valid), which are
- |    treated as unsigned numbers,
- |    and returns their sum as a TAG_Valid or TAG_Special f.p. number.
- |    The returned number is normalized.
- |    Basic checks are performed if PARANOID is defined.
- */
-
-#include "exception.h"
-#include "fpu_emu.h"
-#include "control_w.h"
-
-
-int  FPU_u_add(const FPU_REG *arg1, const FPU_REG *arg2, FPU_REG *answ,
-	       u16 control_w, u_char sign, s32 expa, s32 expb)
-{
-  const FPU_REG *rtmp;
-  FPU_REG shifted;
-  u32 extent = 0;
-  int ediff = expa - expb, ed2, eflag, ovfl, carry;
-
-  if (ediff < 0)
-    {
-      ediff = -ediff;
-      rtmp = arg1;
-      arg1 = arg2;
-      arg2 = rtmp;
-      expa = expb;
-    }
-
-  /* Now we have exponent of arg1 >= exponent of arg2 */
-  
-  answ->exp = expa;
-
-#ifdef PARANOID
-  if (!(arg1->sigh & 0x80000000) || !(arg2->sigh & 0x80000000))
-    {
-      INTERNAL(0x201);
-      return -1;
-    }
-#endif
-
-  if (ediff == 0)
-    {
-      extent = 0;
-      shifted.sigl = arg2->sigl;
-      shifted.sigh = arg2->sigh;
-    }
-  else if (ediff < 32)
-    {
-      ed2 = 32 - ediff;
-      extent = arg2->sigl << ed2;
-      shifted.sigl = arg2->sigl >> ediff;
-      shifted.sigl |= (arg2->sigh << ed2);
-      shifted.sigh = arg2->sigh >> ediff;
-    }
-  else if (ediff < 64)
-    {
-      ediff -= 32;
-      if (! ediff)
-	{
-	  eflag = 0;
-	  extent = arg2->sigl;
-	  shifted.sigl = arg2->sigh;
-	}
-      else
-        {
-          u32 extent2;
-
-          ed2 = 32 - ediff;
-          eflag = arg2->sigl;
-          if (eflag)
-              extent |= 1;
-          extent2 = arg2->sigl >> ediff; 
-          extent2 |= (arg2->sigh << ed2);
-          extent |= extent2;
-          shifted.sigl = arg2->sigh >> ediff;
-        }
-      shifted.sigh = 0;
-    }
-  else
-    {
-      ediff -= 64;
-      if (! ediff)
-	{
-	  eflag = arg2->sigl;
-	  extent = arg2->sigh;
-	}
-      else
-	{
-	  eflag = arg2->sigl | arg2->sigh;
-          if (ediff < 32)
-             extent = arg2->sigh >> ediff;
-          else
-             extent = 0;
-	}
-      shifted.sigl = 0;
-      shifted.sigh = 0;
-      if (eflag)
-	extent |= 1;
-    }
-
-  answ->sigh = arg1->sigh + shifted.sigh;
-  ovfl = shifted.sigh > answ->sigh;
-  answ->sigl = arg1->sigl + shifted.sigl;
-  if (shifted.sigl > answ->sigl)
-    {
-      answ->sigh ++;
-      if (answ->sigh == 0)
-	ovfl = 1;
-    }
-  if (ovfl)
-    {
-      carry = extent & 1;
-      extent >>= 1;
-      extent |= carry;
-      if (answ->sigl & 1)
-	extent |= 0x80000000;
-      answ->sigl >>= 1;
-      if (answ->sigh & 1)
-	answ->sigl |= 0x80000000;
-      answ->sigh >>= 1;
-      answ->sigh |= 0x80000000;
-      answ->exp ++;
-    }
-
-  return FPU_round(answ, extent, control_w, sign);
-}
--- a/bochs/fpu/reg_u_div.c
+++ b/bochs/fpu/reg_u_div.c
@ -1,278 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  reg_u_div.c                                                              |
- |  $Id: reg_u_div.c,v 1.7 2003-10-05 12:26:11 sshwarts Exp $
- |                                                                           |
- | Divide one FPU_REG by another and put the result in a destination FPU_REG.|
- |                                                                           |
- | Copyright (C) 1992,1993,1995,1997,1999                                    |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@melbpc.org.au                             |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- |                                                                           |
- |  Does not compute the destination exponent, but does adjust it.           |
- |                                                                           |
- |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
- |    one was raised, or -1 on internal error.                               |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "fpu_emu.h"
-#include "control_w.h"
-
-
-int FPU_u_div(const FPU_REG *a, const FPU_REG *b, FPU_REG *dest,
-	      u16 control_w, u8 sign)
-{
-  s32 exp;
-  u32 divr32, rem, rat1, rat2, work32, accum3, prodh;
-  u64 work64, divr64, prod64, accum64;
-  u8 ovfl;
-
-  exp = (s32)a->exp - (s32)b->exp;
-
-  /* if the above subtraction of signed 16-bit values overflowed, substitute
-         the maximum positive exponent to force FPU_round to produce overflow */
-  if (exp > 0x7FFF) 
-    exp = 0x7FFF;
-
-  if (exp < EXP_WAY_UNDER)
-    exp = EXP_WAY_UNDER;
-
-  dest->exp = exp;
-#ifdef PARANOID
-  if (!(b->sigh & 0x80000000))
-    {
-      INTERNAL(0x202);
-    }
-#endif
-
-  work64 = significand(a);
-
-  /* We can save a lot of time if the divisor has all its lowest
-     32 bits equal to zero. */
-  if (b->sigl == 0)
-    {
-      divr32 = b->sigh;
-      ovfl = a->sigh >= divr32;
-      rat1 = work64 / divr32;
-      rem = work64 % divr32;
-      work64 = rem;
-      work64 <<= 32;
-      rat2 = work64 / divr32;
-      rem = work64 % divr32;
-
-      work64 = rem;
-      work64 <<= 32;
-      rem = work64 / divr32;
-
-      if (ovfl)
-	{
-	  rem >>= 1;
-	  if (rat2 & 1)
-	    rem |= 0x80000000;
-	  rat2 >>= 1;
-	  if (rat1 & 1)
-	    rat2 |= 0x80000000;
-	  rat1 >>= 1;
-	  rat1 |= 0x80000000;
-	  dest->exp ++;
-	}
-      dest->sigh = rat1;
-      dest->sigl = rat2;
-
-      dest->exp --;
-      return FPU_round(dest, rem, control_w, sign);
-    }
-
-  /* This may take a little time... */
-  accum64 = work64;
-  divr64 = significand(b);
-
-  if ((ovfl = accum64 >= divr64))
-    accum64 -= divr64;
-  divr32 = b->sigh+1;
-
-  if (divr32 != 0)
-    {
-      rat1 = accum64 / divr32;
-    }
-  else
-    rat1 = accum64 >> 32;
-  prod64 = rat1 * (u64)b->sigh;
-
-  accum64 -= prod64;
-  prod64 = rat1 * (u64)b->sigl;
-  accum3 = prod64;
-  if (accum3)
-    {
-      accum3 = -accum3;
-      accum64 --;
-    }
-  prodh = prod64 >> 32;
-  accum64 -= prodh;
-
-  work32 = accum64 >> 32;
-  if (work32)
-    {
-#ifdef PARANOID
-      if (work32 != 1)
-	{
-	  INTERNAL(0x203);
-	}
-#endif
-
-      /* Need to subtract the divisor once more. */
-      work32 = accum3;
-      accum3 = work32 - b->sigl;
-      if (accum3 > work32)
-	accum64 --;
-      rat1 ++;
-      accum64 -= b->sigh;
-
-#ifdef PARANOID
-      if ((accum64 >> 32))
-	{
-	  INTERNAL(0x203);
-	}
-#endif
-    }
-
-  /* Now we essentially repeat what we have just done, but shifted
-     32 bits. */
-
-  accum64 <<= 32;
-  accum64 |= accum3;
-  if (accum64 >= divr64)
-    {
-      accum64 -= divr64;
-      rat1 ++;
-    }
-  if (divr32 != 0)
-    {
-      rat2 = accum64 / divr32;
-    }
-  else
-    rat2 = accum64 >> 32;
-  prod64 = rat2 * (u64)b->sigh;
-
-  accum64 -= prod64;
-  prod64 = rat2 * (u64)b->sigl;
-  accum3 = prod64;
-  if (accum3)
-    {
-      accum3 = -accum3;
-      accum64 --;
-    }
-  prodh = prod64 >> 32;
-  accum64 -= prodh;
-
-  work32 = accum64 >> 32;
-  if (work32)
-    {
-#ifdef PARANOID
-      if (work32 != 1)
-	{
-	  INTERNAL(0x203);
-	}
-#endif
-
-      /* Need to subtract the divisor once more. */
-      work32 = accum3;
-      accum3 = work32 - b->sigl;
-      if (accum3 > work32)
-	accum64 --;
-      rat2 ++;
-      if (rat2 == 0)
-	rat1 ++;
-      accum64 -= b->sigh;
-
-#ifdef PARANOID
-      if ((accum64 >> 32))
-	{
-	  INTERNAL(0x203);
-	}
-#endif
-    }
-
-  /* Tidy up the remainder */
-
-  accum64 <<= 32;
-  accum64 |= accum3;
-  if (accum64 >= divr64)
-    {
-      accum64 -= divr64;
-      rat2 ++;
-      if (rat2 == 0)
-	{
-	  rat1 ++;
-#ifdef PARANOID
-	  /* No overflow should be possible here */
-	  if (rat1 == 0)
-	    {
-	      INTERNAL(0x203);
-	    }
-	}
-#endif
-    }
-
-  /* The basic division is done, now we must be careful with the
-     remainder. */
-
-  if (ovfl)
-    {
-      if (rat2 & 1)
-	rem = 0x80000000;
-      else
-	rem = 0;
-      rat2 >>= 1;
-      if (rat1 & 1)
-	rat2 |= 0x80000000;
-      rat1 >>= 1;
-      rat1 |= 0x80000000;
-
-      if (accum64)
-	rem |= 0xff0000;
-
-      dest->exp ++;
-    }
-  else
-    {
-      /* Now we just need to know how large the remainder is
-	 relative to half the divisor. */
-      if (accum64 == 0)
-	rem = 0;
-      else
-	{
-	  accum3 = accum64 >> 32;
-	  if (accum3 & 0x80000000)
-	    {
-	      /* The remainder is definitely larger than 1/2 divisor. */
-	      rem = 0xff000000;
-	    }
-	  else
-	    {
-	      accum64 <<= 1;
-	      if (accum64 >= divr64)
-		{
-		  accum64 -= divr64;
-		  if (accum64 == 0)
-		    rem = 0x80000000;
-		  else
-		    rem = 0xff000000;
-		}
-	      else
-		rem = 0x7f000000;
-	    }
-	}
-    }
-
-  dest->sigh = rat1;
-  dest->sigl = rat2;
-
-  dest->exp --;
-  return FPU_round(dest, rem, control_w, sign);
-}
--- a/bochs/fpu/reg_u_mul.c
+++ b/bochs/fpu/reg_u_mul.c
@ -1,100 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  reg_u_mul.c                                                              |
- |  $Id: reg_u_mul.c,v 1.7 2003-10-05 12:26:11 sshwarts Exp $
- |                                                                           |
- | Core multiplication routine                                               |
- |                                                                           |
- | Copyright (C) 1992,1993,1995,1997,1999                                    |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@melbpc.org.au                             |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- |   Basic multiplication routine.                                           |
- |   Does not check the resulting exponent for overflow/underflow            |
- |                                                                           |
- |   Internal working is at approx 128 bits.                                 |
- |   Result is rounded to nearest 53 or 64 bits, using "nearest or even".    |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "fpu_emu.h"
-#include "control_w.h"
-
-
-int FPU_u_mul(const FPU_REG *a, const FPU_REG *b, FPU_REG *c, u16 cw,
-	      u_char sign, int expon)
-{
-  u64 mu, ml, mi;
-  u32 lh, ll, th, tl;
-
-#ifdef PARANOID
-  if (! (a->sigh & 0x80000000) || ! (b->sigh & 0x80000000))
-    {
-      INTERNAL(0x205);
-    }
-#endif
-
-  ml = a->sigl;
-  ml *= b->sigl;
-  ll = ml;
-  lh = ml >> 32;
-
-  mu = a->sigh;
-  mu *= b->sigh;
-
-  mi = a->sigh;
-  mi *= b->sigl;
-  tl = mi;
-  th = mi >> 32;
-  lh += tl;
-  if (tl > lh)
-    mu ++;
-  mu += th;
-
-  mi = a->sigl;
-  mi *= b->sigh;
-  tl = mi;
-  th = mi >> 32;
-  lh += tl;
-  if (tl > lh)
-    mu ++;
-  mu += th;
-
-  ml = lh;
-  ml <<= 32;
-  ml += ll;
-
-  expon -= EXP_BIAS-1;
-  if (expon <= EXP_WAY_UNDER)
-    expon = EXP_WAY_UNDER;
-    
-  /* if the addition of signed 16-bit values overflowed, substitute
-    the maximum positive exponent to force FPU_round to produce overflow */
-  if (expon > 0x7FFE)
-      expon = 0x7FFE;
-
-  c->exp = expon;
-
-  if (! (mu & BX_CONST64(0x8000000000000000)))
-    {
-      mu <<= 1;
-      if (ml & BX_CONST64(0x8000000000000000))
-	mu |= 1;
-      ml <<= 1;
-      c->exp --;
-    }
-
-  ll = ml;
-  lh = ml >> 32;
-
-  if (ll)
-    lh |= 1;
-
-  c->sigl = mu;
-  c->sigh = mu >> 32;
-
-  return FPU_round(c, lh, cw, sign);
-}
--- a/bochs/fpu/reg_u_sub.c
+++ b/bochs/fpu/reg_u_sub.c
@ -1,224 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  reg_u_sub.c                                                              |
- |  $Id: reg_u_sub.c,v 1.7 2003-10-05 12:26:11 sshwarts Exp $
- |                                                                           |
- | Core floating point subtraction routine.                                  |
- |                                                                           |
- | Copyright (C) 1992,1993,1995,1997,1999                                    |
- |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
- |                  E-mail   billm@melbpc.org.au                             |
- |                                                                           |
- |    Return value is the tag of the answer, or-ed with FPU_Exception if     |
- |    one was raised, or -1 on internal error.                               |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*
- |    Kernel subtraction routine FPU_u_sub(reg *arg1, reg *arg2, reg *answ).
- |    Takes two valid reg f.p. numbers (TAG_Valid), which are
- |    treated as unsigned numbers,
- |    and returns their difference as a TAG_Valid or TAG_Zero f.p.
- |    number.
- |    The first number (arg1) must be the larger.
- |    The returned number is normalized.
- |    Basic checks are performed if PARANOID is defined.
- */
-
-#include "exception.h"
-#include "fpu_emu.h"
-#include "control_w.h"
-
-
-int  FPU_u_sub(const FPU_REG *arg1, const FPU_REG *arg2, FPU_REG *dest,
-	       u16 control_w, u_char sign, int expa, int expb)
-{
-  FPU_REG shifted, answ;
-  u32 extent;
-  int ediff = expa - expb, ed2, borrow;
-
-#ifdef PARANOID
-  if (ediff < 0)
-    {
-      INTERNAL(0x206);
-      return -1;
-    }
-#endif
-  
-  answ.exp = expa;
-
-#ifdef PARANOID
-  if (!(arg1->sigh & 0x80000000) || !(arg2->sigh & 0x80000000))
-    {
-      INTERNAL(0x209);
-      return -1;
-    }
-#endif
-
-  if (ediff == 0)
-    {
-      shifted.sigl = arg2->sigl;
-      shifted.sigh = arg2->sigh;
-      extent = 0;
-    }
-  else if (ediff < 32)
-    {
-      ed2 = 32 - ediff;
-      extent = arg2->sigl << ed2;
-      shifted.sigl = arg2->sigl >> ediff;
-      shifted.sigl |= (arg2->sigh << ed2);
-      shifted.sigh = arg2->sigh >> ediff;
-    }
-  else if (ediff < 64)
-    {
-      ediff -= 32;
-      if (! ediff)
-	{
-	  extent = arg2->sigl;
-	  shifted.sigl = arg2->sigh;
-	  shifted.sigh = 0;
-	}
-      else
-	{
-	  ed2 = 32 - ediff;
-	  extent = arg2->sigl >> ediff;
-	  extent |= (arg2->sigh << ed2);
-	  if (arg2->sigl << ed2)
-	    extent |= 1;
-	  shifted.sigl = arg2->sigh >> ediff;
-	  shifted.sigh = 0;
-	}
-    }
-  else
-    {
-      ediff -= 64;
-      if (! ediff)
-	{
-	  extent = arg2->sigh;
-	  if (arg2->sigl)
-	    extent |= 1;
-	  shifted.sigl = 0;
-	  shifted.sigh = 0;
-	}
-      else
-	{
-	  if (ediff < 32)
-	    {
-	      extent = arg2->sigh >> ediff;
-	      if (arg2->sigl || (arg2->sigh << (32-ediff)))
-		extent |= 1;
-	    }
-	  else
-	    extent = 1;
-	  shifted.sigl = 0;
-	  shifted.sigh = 0;
-	}
-    }
-
-  extent = -extent;
-  borrow = extent;
-  answ.sigl = arg1->sigl - shifted.sigl;
-  if (answ.sigl > arg1->sigl)
-    {
-      if (borrow)
-	answ.sigl --;
-      borrow = 1;
-    }
-  else if (borrow)
-    {
-      answ.sigl --;
-      if (answ.sigl != 0xffffffff)
-	borrow = 0;
-    }
-  answ.sigh = arg1->sigh - shifted.sigh;
-  if (answ.sigh > arg1->sigh)
-    {
-      if (borrow)
-	answ.sigh --;
-      borrow = 1;
-    }
-  else if (borrow)
-    {
-      answ.sigh --;
-      if (answ.sigh != 0xffffffff)
-	borrow = 0;
-    }
-
-#ifdef PARANOID
-  if (borrow)
-    {
-      /* This can only occur if the code is bugged */
-      INTERNAL(0x212);
-      return -1;
-    }
-#endif
-
-  if (answ.sigh & 0x80000000)
-    {
-      /*
-	The simpler "*dest = answ" is broken in gcc
-      */
-      dest->exp = answ.exp;
-      dest->sigh = answ.sigh;
-      dest->sigl = answ.sigl;
-      return FPU_round(dest, extent, control_w, sign);
-    }
-
-  if (answ.sigh == 0)
-    {
-      if (answ.sigl)
-	{
-	  answ.sigh = answ.sigl;
-	  answ.sigl = extent;
-	  extent = 0;
-	  answ.exp -= 32;
-	}
-      else if (extent)
-	{
-/*
- *	 A rare case, the only one which is non-zero if we got here
- *         is:           1000000 .... 0000
- *                      -0111111 .... 1111 1
- *                       -------------------- 
- *                       0000000 .... 0000 1 
- */
-	  if (extent != 0x80000000)
-	    {
-	      /* This can only occur if the code is bugged */
-	      INTERNAL(0x210);
-	      return -1;
-	    }
-	  dest->sigh = extent;
-	  dest->sigl = extent = 0;
-     dest->exp &= 0x7FFF;
-     dest->exp -= 64;
-     if (arg1->sigh == 0x80000000 && arg2->sigh == 0x80000000)
-         dest->exp++;
-     dest->exp -= EXTENDED_Ebias;
-     return FPU_round(dest, extent, control_w, sign);
-	}
-      else
-	{
-	  dest->exp = 0;
-	  dest->sigh = dest->sigl = 0;
-	  return TAG_Zero;
-	}
-    }
-
-  while (!(answ.sigh & 0x80000000))
-    {
-      answ.sigh <<= 1;
-      if (answ.sigl & 0x80000000)
-	answ.sigh |= 1;
-      answ.sigl <<= 1;
-      if (extent & 0x80000000)
-	answ.sigl |= 1;
-      extent <<= 1;
-      answ.exp --;
-    }
-
-  dest->exp = answ.exp;
-  dest->sigh = answ.sigh;
-  dest->sigl = answ.sigl;
-
-  return FPU_round(dest, extent, control_w, sign);
-}
--- a/bochs/fpu/round_Xsig.c
+++ b/bochs/fpu/round_Xsig.c
@ -1,96 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  round_Xsig.c                                                             |
- |  $Id: round_Xsig.c,v 1.2 2001-10-06 03:53:46 bdenney Exp $
- |                                                                           |
- | Copyright (C) 1992,1993,1994,1995,1999                                    |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@melbpc.org.au              |
- |                                                                           |
- | Normalize and round a 12 byte quantity.                                   |
- |   int round_Xsig(Xsig *n)                                                 |
- |                                                                           |
- | Normalize a 12 byte quantity.                                             |
- |   int norm_Xsig(Xsig *n)                                                  |
- |                                                                           |
- | Each function returns the size of the shift (nr of bits).                 |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_emu.h"
-#include "poly.h"
-
-int round_Xsig(Xsig *x)
-{
-  int n = 0;
-
-  if ( x->msw == 0 )
-    {
-      x->msw = x->midw;
-      x->midw = x->lsw;
-      x->lsw = 0;
-      n = 32;
-    }
-  while ( !(x->msw & 0x80000000) )
-    {
-      x->msw <<= 1;
-      if ( x->midw & 0x80000000 ) x->msw |= 1;
-      x->midw <<= 1;
-      if ( x->lsw & 0x80000000 ) x->midw |= 1;
-      x->lsw <<= 1;
-      n++;
-    }
-  if ( x->lsw & 0x80000000 )
-    {
-      x->midw ++;
-      if ( x->midw == 0 )
-	x->msw ++;
-      if ( x->msw == 0 )
-	{
-	  x->msw = 0x80000000;
-	  n--;
-	}
-    }
-  
-
-  return -n;
-}
-
-
-int norm_Xsig(Xsig *x)
-{
-  int n = 0;
-
-  if ( x->msw == 0 )
-    {
-      if ( x->midw == 0 )
-	{
-	  x->msw = x->lsw;
-	  x->midw = 0;
-	  x->lsw = 0;
-	  n = 64;
-	}
-      else
-	{
-	  x->msw = x->midw;
-	  x->midw = x->lsw;
-	  x->lsw = 0;
-	  n = 32;
-	}
-    }
-  while ( !(x->msw & 0x80000000) )
-    {
-      x->msw <<= 1;
-      if ( x->midw & 0x80000000 ) x->msw |= 1;
-      x->midw <<= 1;
-      if ( x->lsw & 0x80000000 ) x->midw |= 1;
-      x->lsw <<= 1;
-      n++;
-    }
-
-  return -n;
-}
-
-
-
-
-
--- a/bochs/fpu/shr_Xsig.c
+++ b/bochs/fpu/shr_Xsig.c
@ -1,42 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  shr_Xsig.S                                                               |
- |  $Id: shr_Xsig.c,v 1.2 2001-10-06 03:53:46 bdenney Exp $
- |                                                                           |
- | 12 byte right shift function                                              |
- |                                                                           |
- | Copyright (C) 1992,1994,1995                                              |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
- |                                                                           |
- |                                                                           |
- |   Extended shift right function.                                          |
- |   Fastest for small shifts.                                               |
- |   Shifts the 12 byte quantity pointed to by the first arg (arg)           |
- |   right by the number of bits specified by the second arg (nr).           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-#include "fpu_emu.h"
-#include "poly.h"
-
-void shr_Xsig(Xsig *arg, const int nr)
-{
-  int n = nr;
-
-  while ( n >= 32 )
-    {
-      arg->lsw = arg->midw;
-      arg->midw = arg->msw;
-      arg->msw = 0;
-      n -= 32;
-    }
-
-  if ( n <= 0 )
-    return;
-
-  arg->lsw = (arg->lsw >> n) | (arg->midw << (32-n));
-  arg->midw = (arg->midw >> n) | (arg->msw << (32-n));
-  arg->msw >>= n;
-
-}
-
--- a/bochs/fpu/softfloat-macros.h
+++ b/bochs/fpu/softfloat-macros.h
@ -32,6 +32,9 @@ these four paragraphs for those parts of this code that are retained.
 *            Stanislav Shwartsman (gate at fidonet.org.il)
 * ==========================================================================*/ 

+#ifndef _SOFTFLOAT_MACROS_H_
+#define _SOFTFLOAT_MACROS_H_
+
 /*----------------------------------------------------------------------------
 | Shifts `a' right by the number of bits given in `count'.  If any nonzero
 | bits are shifted off, they are ``jammed'' into the least significant bit of
@ -41,7 +44,7 @@ these four paragraphs for those parts of this code that are retained.
 | The result is stored in the location pointed to by `zPtr'.
 *----------------------------------------------------------------------------*/

-BX_CPP_INLINE void shift32RightJamming(Bit32u a, Bit16s count, Bit32u *zPtr)
+BX_CPP_INLINE void shift32RightJamming(Bit32u a, int count, Bit32u *zPtr)
 {
    Bit32u z;

@ -66,20 +69,17 @@ BX_CPP_INLINE void shift32RightJamming(Bit32u a, Bit16s count, Bit32u *zPtr)
 | The result is stored in the location pointed to by `zPtr'.
 *----------------------------------------------------------------------------*/

-BX_CPP_INLINE void shift64RightJamming(Bit64u a, Bit16s count, Bit64u *zPtr)
+BX_CPP_INLINE void shift64RightJamming(Bit64u a, int count, Bit64u *zPtr)
 {
-    Bit64u z;
-
    if (count == 0) {
-        z = a;
+        *zPtr = a;
    }
    else if (count < 64) {
-        z = (a>>count) | ((a<<((-count) & 63)) != 0);
+        *zPtr = (a>>count) | ((a<<((-count) & 63)) != 0);
    }
    else {
-        z = (a != 0);
+        *zPtr = (a != 0);
    }
-    *zPtr = z;
 }

 /*----------------------------------------------------------------------------
@ -101,10 +101,10 @@ BX_CPP_INLINE void shift64RightJamming(Bit64u a, Bit16s count, Bit64u *zPtr)

 BX_CPP_INLINE void
 shift64ExtraRightJamming(
-     Bit64u a0, Bit64u a1, Bit16s count, Bit64u *z0Ptr, Bit64u *z1Ptr)
+     Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
 {
    Bit64u z0, z1;
-    Bit8s negCount = (-count) & 63;
+    int negCount = (-count) & 63;

    if (count == 0) {
        z1 = a1;
@ -137,9 +137,7 @@ BX_CPP_INLINE void
 BX_CPP_INLINE void
 add128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
 {
-    Bit64u z1;
-
-    z1 = a1 + b1;
+    Bit64u z1 = a1 + b1;
    *z1Ptr = z1;
    *z0Ptr = a0 + b0 + (z1 < a1);
 }
@ -237,10 +235,9 @@ static Bit32u estimateSqrt32(Bit16s aExp, Bit32u a)
        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
    };
-    Bit8s index;
    Bit32u z;

-    index = (a>>27) & 15;
+    int index = (a>>27) & 15;
    if (aExp & 1) {
        z = 0x4000 + (a>>17) - sqrtOddAdjustments[index];
        z = ((a / z)<<14) + (z<<15);
@ -262,7 +259,7 @@ static Bit32u estimateSqrt32(Bit16s aExp, Bit32u a)

 static int countLeadingZeros32(Bit32u a)
 {
-    static const Bit8s countLeadingZerosHigh[] = {
+    static const int countLeadingZerosHigh[] = {
        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@ -280,9 +277,7 @@ static int countLeadingZeros32(Bit32u a)
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    };
-    Bit8s shiftCount;
-
-    shiftCount = 0;
+    int shiftCount = 0;
    if (a < 0x10000) {
        shiftCount += 16;
        a <<= 16;
@ -300,11 +295,9 @@ static int countLeadingZeros32(Bit32u a)
 | `a'.  If `a' is zero, 64 is returned.
 *----------------------------------------------------------------------------*/

-static int countLeadingZeros64(Bit64u a)
+BX_CPP_INLINE int countLeadingZeros64(Bit64u a)
 {
-    Bit8s shiftCount;
-
-    shiftCount = 0;
+    int shiftCount = 0;
    if (a < ((Bit64u) 1)<<32) {
        shiftCount += 32;
    }
@ -326,10 +319,10 @@ static int countLeadingZeros64(Bit64u a)
 *----------------------------------------------------------------------------*/

 BX_CPP_INLINE void
- shift128Right(Bit64u a0, Bit64u a1, Bit16s count, Bit64u *z0Ptr, Bit64u *z1Ptr)
+ shift128Right(Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
 {
    Bit64u z0, z1;
-    Bit8s negCount = (-count) & 63;
+    int negCount = (-count) & 63;

    if (count == 0) {
        z1 = a1;
@ -360,10 +353,10 @@ BX_CPP_INLINE void

 BX_CPP_INLINE void
 shift128RightJamming(
-     Bit64u a0, Bit64u a1, Bit16s count, Bit64u *z0Ptr, Bit64u *z1Ptr)
+     Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
 {
    Bit64u z0, z1;
-    Bit8s negCount = (- count) & 63;
+    int negCount = (-count) & 63;

    if (count == 0) {
        z1 = a1;
@ -389,75 +382,6 @@ BX_CPP_INLINE void
    *z0Ptr = z0;
 }

-/*----------------------------------------------------------------------------
-| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
-| by 64 _plus_ the number of bits given in `count'.  The shifted result is
-| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
-| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
-| off form a third 64-bit result as follows:  The _last_ bit shifted off is
-| the most-significant bit of the extra result, and the other 63 bits of the
-| extra result are all zero if and only if _all_but_the_last_ bits shifted off
-| were all zero.  This extra result is stored in the location pointed to by
-| `z2Ptr'.  The value of `count' can be arbitrarily large.
-|     (This routine makes more sense if `a0', `a1', and `a2' are considered
-| to form a fixed-point value with binary point between `a1' and `a2'.  This
-| fixed-point value is shifted right by the number of bits given in `count',
-| and the integer part of the result is returned at the locations pointed to
-| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
-| corrupted as described above, and is returned at the location pointed to by
-| `z2Ptr'.)
-*----------------------------------------------------------------------------*/
-
-BX_CPP_INLINE void
- shift128ExtraRightJamming(
-     Bit64u a0,
-     Bit64u a1,
-     Bit64u a2,
-     Bit16s count,
-     Bit64u *z0Ptr,
-     Bit64u *z1Ptr,
-     Bit64u *z2Ptr
-)
-{
-    Bit64u z0, z1, z2;
-    Bit8s negCount = (-count) & 63;
-
-    if (count == 0) {
-        z2 = a2;
-        z1 = a1;
-        z0 = a0;
-    }
-    else {
-        if (count < 64) {
-            z2 = a1<<negCount;
-            z1 = (a0<<negCount) | (a1>>count);
-            z0 = a0>>count;
-        }
-        else {
-            if (count == 64) {
-                z2 = a1;
-                z1 = a0;
-            }
-            else {
-                a2 |= a1;
-                if (count < 128) {
-                    z2 = a0<<negCount;
-                    z1 = a0>>(count & 63);
-                }
-                else {
-                    z2 = (count == 128) ? a0 : (a0 != 0);
-                    z1 = 0;
-                }
-            }
-            z0 = 0;
-        }
-        z2 |= (a2 != 0);
-    }
-    *z2Ptr = z2;
-    *z1Ptr = z1;
-    *z0Ptr = z0;
-}
-
 /*----------------------------------------------------------------------------
 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
 | number of bits given in `count'.  Any bits shifted off are lost.  The value
@ -467,7 +391,7 @@ BX_CPP_INLINE void

 BX_CPP_INLINE void
 shortShift128Left(
-     Bit64u a0, Bit64u a1, Bit16s count, Bit64u *z0Ptr, Bit64u *z1Ptr)
+     Bit64u a0, Bit64u a1, int count, Bit64u *z0Ptr, Bit64u *z1Ptr)
 {
    *z1Ptr = a1<<count;
    *z0Ptr = (count == 0) ? a0 : (a0<<count) | (a1>>((-count) & 63));
@ -494,7 +418,7 @@ BX_CPP_INLINE void add192(
 )
 {
    Bit64u z0, z1, z2;
-    Bit8s carry0, carry1;
+    int carry0, carry1;

    z2 = a2 + b2;
    carry1 = (z2 < a2);
@ -530,7 +454,7 @@ BX_CPP_INLINE void sub192(
 )
 {
    Bit64u z0, z1, z2;
-    Bit8s borrow0, borrow1;
+    int borrow0, borrow1;

    z2 = a2 - b2;
    borrow1 = (a2 < b2);
@ -578,4 +502,140 @@ BX_CPP_INLINE int lt128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
    return (a0 < b0) || ((a0 == b0) && (a1 < b1));
 }

+#endif	/* FLOATX80 */
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
+| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+| `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void mul128By64To192(
+     Bit64u a0,
+     Bit64u a1,
+     Bit64u b,
+     Bit64u *z0Ptr,
+     Bit64u *z1Ptr,
+     Bit64u *z2Ptr
+)
+{
+    Bit64u z0, z1, z2, more1;
+
+    mul64To128(a1, b, &z1, &z2);
+    mul64To128(a0, b, &z0, &more1);
+    add128(z0, more1, 0, z1, &z0, &z1);
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+| product.  The product is broken into four 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void mul128To256(
+     Bit64u a0,
+     Bit64u a1,
+     Bit64u b0,
+     Bit64u b1,
+     Bit64u *z0Ptr,
+     Bit64u *z1Ptr,
+     Bit64u *z2Ptr,
+     Bit64u *z3Ptr
+)
+{
+    Bit64u z0, z1, z2, z3;
+    Bit64u more1, more2;
+
+    mul64To128(a1, b1, &z2, &z3);
+    mul64To128(a1, b0, &z1, &more2);
+    add128(z1, more2, 0, z2, &z1, &z2);
+    mul64To128(a0, b0, &z0, &more1);
+    add128(z0, more1, 0, z1, &z0, &z1);
+    mul64To128(a0, b1, &more1, &more2);
+    add128(more1, more2, 0, z2, &more1, &z2);
+    add128(z0, z1, 0, more1, &z0, &z1);
+    *z3Ptr = z3;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+| by 64 _plus_ the number of bits given in `count'.  The shifted result is
+| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+| off form a third 64-bit result as follows:  The _last_ bit shifted off is
+| the most-significant bit of the extra result, and the other 63 bits of the
+| extra result are all zero if and only if _all_but_the_last_ bits shifted off
+| were all zero.  This extra result is stored in the location pointed to by
+| `z2Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0', `a1', and `a2' are considered
+| to form a fixed-point value with binary point between `a1' and `a2'.  This
+| fixed-point value is shifted right by the number of bits given in `count',
+| and the integer part of the result is returned at the locations pointed to
+| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+| corrupted as described above, and is returned at the location pointed to by
+| `z2Ptr'.)
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void shift128ExtraRightJamming(
+     Bit64u a0,
+     Bit64u a1,
+     Bit64u a2,
+     int count,
+     Bit64u *z0Ptr,
+     Bit64u *z1Ptr,
+     Bit64u *z2Ptr
+)
+{
+    Bit64u z0, z1, z2;
+    int negCount = (-count) & 63;
+
+    if (count == 0) {
+        z2 = a2;
+        z1 = a1;
+        z0 = a0;
+    }
+    else {
+        if (count < 64) {
+            z2 = a1<<negCount;
+            z1 = (a0<<negCount) | (a1>>count);
+            z0 = a0>>count;
+        }
+        else {
+            if (count == 64) {
+                z2 = a1;
+                z1 = a0;
+            }
+            else {
+                a2 |= a1;
+                if (count < 128) {
+                    z2 = a0<<negCount;
+                    z1 = a0>>(count & 63);
+                }
+                else {
+                    z2 = (count == 128) ? a0 : (a0 != 0);
+                    z1 = 0;
+                }
+            }
+            z0 = 0;
+        }
+        z2 |= (a2 != 0);
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+#endif  /* FLOAT128 */
+
 #endif
--- a/bochs/fpu/softfloat-round-pack.cc
+++ b/bochs/fpu/softfloat-round-pack.cc
@ -0,0 +1,675 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#define FLOAT128
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#include "softfloat.h"
+#include "softfloat-round-pack.h"
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations. (Can be specialized to target
+| if desired).
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine:  (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output.  These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize.h"
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input.  If `zSign' is 1, the input is negated before being converted to an
+| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer.  However, if the fixed-
+| point input is too large, the invalid exception is raised and the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32s roundAndPackInt32(int zSign, Bit64u absZ, float_status_t &status)
+{
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    int roundIncrement = 0x40;
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = 0x7F;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    int roundBits = absZ & 0x7F;
+    absZ = (absZ + roundIncrement)>>7;
+    absZ &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
+    Bit32s z = absZ;
+    if (zSign) z = -z;
+    if ((absZ>>32) || (z && ((z < 0) ^ zSign))) {
+        float_raise(status, float_flag_invalid);
+        return (Bit32s)(int32_indefinite);
+    }
+    if (roundBits) float_raise(status, float_flag_inexact);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &status)
+{
+    Bit64s z;
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    int increment = ((Bit64s) absZ1 < 0);
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) increment = 0;
+        else {
+            if (zSign) {
+                increment = (roundingMode == float_round_down) && absZ1;
+            }
+            else {
+                increment = (roundingMode == float_round_up) && absZ1;
+            }
+        }
+    }
+    if (increment) {
+        ++absZ0;
+        if (absZ0 == 0) goto overflow;
+        absZ0 &= ~(((Bit64u) (absZ1<<1) == 0) & roundNearestEven);
+    }
+    z = absZ0;
+    if (zSign) z = -z;
+    if (z && ((z < 0) ^ zSign)) {
+ overflow:
+        float_raise(status, float_flag_invalid);
+        return (Bit64s)(int64_indefinite);
+    }
+    if (absZ1) float_raise(status, float_flag_inexact);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr)
+{
+    int shiftCount = countLeadingZeros32(aSig) - 8;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status)
+{
+    Bit32s roundIncrement, roundBits, roundMask;
+
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    roundIncrement = 0x40;
+    roundMask = 0x7F;
+
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = roundMask;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & roundMask;
+    if (0xFD <= (Bit16u) zExp) {
+        if ((0xFD < zExp)
+             || ((zExp == 0xFD)
+                  && ((Bit32s) (zSig + roundIncrement) < 0)))
+        {
+            float_raise(status, float_flag_overflow | float_flag_inexact);
+            return packFloat32(zSign, 0xFF, 0) - (roundIncrement == 0);
+        }
+        if (zExp < 0) {
+            int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x80000000);
+            shift32RightJamming(zSig, -zExp, &zSig);
+            zExp = 0;
+            roundBits = zSig & roundMask;
+            if (isTiny && roundBits) {
+                float_raise(status, float_flag_underflow);
+                if(get_flush_underflow_to_zero(status)) {
+                    float_raise(status, float_flag_inexact);
+                    return packFloat32(zSign, 0, 0);
+                }
+            }
+        }
+    }
+    if (roundBits) float_raise(status, float_flag_inexact);
+    zSig = ((zSig + roundIncrement) & ~roundMask) >> 7;
+    zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
+    if (zSig == 0) zExp = 0;
+    return packFloat32(zSign, zExp, zSig);
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status)
+{
+    int shiftCount = countLeadingZeros32(zSig) - 1;
+    return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount, status);
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr)
+{
+    int shiftCount = countLeadingZeros64(aSig) - 11;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status)
+{
+    Bit16s roundIncrement, roundBits;
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    roundIncrement = 0x200;
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = 0x3FF;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & 0x3FF;
+    if (0x7FD <= (Bit16u) zExp) {
+        if ((0x7FD < zExp)
+             || ((zExp == 0x7FD)
+                  && ((Bit64s) (zSig + roundIncrement) < 0)))
+        {
+            float_raise(status, float_flag_overflow | float_flag_inexact);
+            return packFloat64(zSign, 0x7FF, 0) - (roundIncrement == 0);
+        }
+        if (zExp < 0) {
+            int isTiny = (zExp < -1) || (zSig + roundIncrement < BX_CONST64(0x8000000000000000));
+            shift64RightJamming(zSig, -zExp, &zSig);
+            zExp = 0;
+            roundBits = zSig & 0x3FF;
+            if (isTiny && roundBits) {
+                float_raise(status, float_flag_underflow);
+                if(get_flush_underflow_to_zero(status)) {
+                    float_raise(status, float_flag_inexact);
+                    return packFloat64(zSign, 0, 0);
+                }
+            }
+        }
+    }
+    if (roundBits) float_raise(status, float_flag_inexact);
+    zSig = (zSig + roundIncrement)>>10;
+    zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
+    if (zSig == 0) zExp = 0;
+    return packFloat64(zSign, zExp, zSig);
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status)
+{
+    int shiftCount = countLeadingZeros64(zSig) - 1;
+    return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount, status);
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr)
+{
+    int shiftCount = countLeadingZeros64(aSig);
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively.  Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 roundAndPackFloatx80(int roundingPrecision, 
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
+{
+    Bit64u roundIncrement, roundMask, roundBits;
+    int increment;
+
+    Bit8u roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    if (roundingPrecision == 64) {
+        roundIncrement = BX_CONST64(0x0000000000000400);
+        roundMask = BX_CONST64(0x00000000000007FF);
+    }
+    else if (roundingPrecision == 32) {
+        roundIncrement = BX_CONST64(0x0000008000000000);
+        roundMask = BX_CONST64(0x000000FFFFFFFFFF);
+    }
+    else goto precision80;
+
+    zSig0 |= (zSig1 != 0);
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = roundMask;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig0 & roundMask;
+    if (0x7FFD <= (Bit32u) (zExp - 1)) {
+        if ((0x7FFE < zExp)
+             || ((zExp == 0x7FFE) && (zSig0 + roundIncrement < zSig0))) 
+        {
+            goto overflow;
+        }
+        if (zExp <= 0) {
+            int isTiny = (zExp < 0) || (zSig0 <= zSig0 + roundIncrement);
+            shift64RightJamming(zSig0, 1 - zExp, &zSig0);
+            zExp = 0;
+            roundBits = zSig0 & roundMask;
+            if (isTiny && roundBits) float_raise(status, float_flag_underflow);
+            if (roundBits) float_raise(status, float_flag_inexact);
+            zSig0 += roundIncrement;
+            if ((Bit64s) zSig0 < 0) zExp = 1;
+            roundIncrement = roundMask + 1;
+            if (roundNearestEven && (roundBits<<1 == roundIncrement))
+                roundMask |= roundIncrement;
+            zSig0 &= ~roundMask;
+            return packFloatx80(zSign, zExp, zSig0);
+        }
+    }
+    if (roundBits) float_raise(status, float_flag_inexact);
+    zSig0 += roundIncrement;
+    if (zSig0 < roundIncrement) {
+        ++zExp;
+        zSig0 = BX_CONST64(0x8000000000000000);
+    }
+    roundIncrement = roundMask + 1;
+    if (roundNearestEven && (roundBits<<1 == roundIncrement))
+        roundMask |= roundIncrement;
+    zSig0 &= ~roundMask;
+    if (zSig0 == 0) zExp = 0;
+    return packFloatx80(zSign, zExp, zSig0);
+ precision80:
+    increment = ((Bit64s) zSig1 < 0);
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) increment = 0;
+        else {
+            if (zSign) {
+                increment = (roundingMode == float_round_down) && zSig1;
+            }
+            else {
+                increment = (roundingMode == float_round_up) && zSig1;
+            }
+        }
+    }
+    if (0x7FFD <= (Bit32u) (zExp - 1)) {
+        if ((0x7FFE < zExp)
+             || ((zExp == 0x7FFE)
+                  && (zSig0 == BX_CONST64(0xFFFFFFFFFFFFFFFF))
+                  && increment)) 
+        {
+            roundMask = 0;
+ overflow:
+            float_raise(status, float_flag_overflow | float_flag_inexact);
+            if ((roundingMode == float_round_to_zero)
+                 || (zSign && (roundingMode == float_round_up))
+                 || (! zSign && (roundingMode == float_round_down))) 
+            {
+                return packFloatx80(zSign, 0x7FFE, ~roundMask);
+            }
+
+            return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+        if (zExp <= 0) {
+            int isTiny = (zExp < 0) || (! increment)
+                || (zSig0 < BX_CONST64(0xFFFFFFFFFFFFFFFF));
+            shift64ExtraRightJamming(zSig0, zSig1, 1 - zExp, &zSig0, &zSig1);
+            zExp = 0;
+            if (isTiny && zSig1) float_raise(status, float_flag_underflow);
+            if (zSig1) float_raise(status, float_flag_inexact);
+            if (roundNearestEven) 
+                increment = ((Bit64s) zSig1 < 0);
+            else {
+                if (zSign) {
+                    increment = (roundingMode == float_round_down) && zSig1;
+                } else {
+                    increment = (roundingMode == float_round_up) && zSig1;
+                }
+            }
+            if (increment) {
+                ++zSig0;
+                zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
+                if ((Bit64s) zSig0 < 0) zExp = 1;
+            }
+            return packFloatx80(zSign, zExp, zSig0);
+        }
+    }
+    if (zSig1) float_raise(status, float_flag_inexact);
+    if (increment) {
+        ++zSig0;
+        if (zSig0 == 0) {
+            ++zExp;
+            zSig0 = BX_CONST64(0x8000000000000000);
+        }
+        else {
+            zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
+        }
+    }
+    else {
+        if (zSig0 == 0) zExp = 0;
+    }
+    return packFloatx80(zSign, zExp, zSig0);
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision, 
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
+{
+    if (zSig0 == 0) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
+    }
+    int shiftCount = countLeadingZeros64(zSig0);
+    shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
+    zExp -= shiftCount;
+    return
+        roundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal quadruple-precision floating-point value
+| represented by the denormalized significand formed by the concatenation of
+| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
+| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
+| significand are stored at the location pointed to by `zSig0Ptr', and the
+| least significant 64 bits of the normalized significand are stored at the
+| location pointed to by `zSig1Ptr'.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat128Subnormal(
+     Bit64u aSig0, Bit64u aSig1, Bit32s *zExpPtr, Bit64u *zSig0Ptr, Bit64u *zSig1Ptr)
+{
+    int shiftCount;
+
+    if (aSig0 == 0) {
+        shiftCount = countLeadingZeros64(aSig1) - 15;
+        if (shiftCount < 0) {
+            *zSig0Ptr = aSig1 >>(-shiftCount);
+            *zSig1Ptr = aSig1 << (shiftCount & 63);
+        }
+        else {
+            *zSig0Ptr = aSig1 << shiftCount;
+            *zSig1Ptr = 0;
+        }
+        *zExpPtr = - shiftCount - 63;
+    }
+    else {
+        shiftCount = countLeadingZeros64(aSig0) - 15;
+        shortShift128Left(aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr);
+        *zExpPtr = 1 - shiftCount;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent.  The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 roundAndPackFloat128(
+     int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, Bit64u zSig2, float_status_t &status)
+{
+    int increment = ((Bit64s) zSig2 < 0);
+    if (0x7FFD <= (Bit32u) zExp) {
+        if ((0x7FFD < zExp)
+             || ((zExp == 0x7FFD)
+                  && eq128(BX_CONST64(0x0001FFFFFFFFFFFF),
+                         BX_CONST64(0xFFFFFFFFFFFFFFFF), zSig0, zSig1)
+                  && increment)) 
+        {
+            float_raise(status, float_flag_overflow | float_flag_inexact);
+            return packFloat128(zSign, 0x7FFF, 0, 0);
+        }
+        if (zExp < 0) {
+            int isTiny = (zExp < -1)
+                || ! increment
+                || lt128(zSig0, zSig1,
+                       BX_CONST64(0x0001FFFFFFFFFFFF),
+                       BX_CONST64(0xFFFFFFFFFFFFFFFF));
+            shift128ExtraRightJamming(
+                zSig0, zSig1, zSig2, -zExp, &zSig0, &zSig1, &zSig2);
+            zExp = 0;
+            if (isTiny && zSig2) float_raise(status, float_flag_underflow);
+            increment = ((Bit64s) zSig2 < 0);
+        }
+    }
+    if (zSig2) float_raise(status, float_flag_inexact);
+    if (increment) {
+        add128(zSig0, zSig1, 0, 1, &zSig0, &zSig1);
+        zSig1 &= ~((zSig2 + zSig2 == 0) & 1);
+    }
+    else {
+        if ((zSig0 | zSig1) == 0) zExp = 0;
+    }
+    return packFloat128(zSign, zExp, zSig0, zSig1);
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input.  This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+float128 normalizeRoundAndPackFloat128(
+     int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
+{
+    Bit64u zSig2;
+
+    if (zSig0 == 0) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
+    }
+    int shiftCount = countLeadingZeros64(zSig0) - 15;
+    if (0 <= shiftCount) {
+        zSig2 = 0;
+        shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
+    }
+    else {
+        shift128ExtraRightJamming(
+            zSig0, zSig1, 0, -shiftCount, &zSig0, &zSig1, &zSig2);
+    }
+    zExp -= shiftCount;
+    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
+}
+
+#endif
--- a/bochs/fpu/softfloat-round-pack.h
+++ b/bochs/fpu/softfloat-round-pack.h
@ -0,0 +1,260 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#ifndef _SOFTFLOAT_ROUND_PACK_H_
+#define _SOFTFLOAT_ROUND_PACK_H_
+
+#include "softfloat.h"
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input.  If `zSign' is 1, the input is negated before being converted to an
+| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer.  However, if the fixed-
+| point input is too large, the invalid exception is raised and the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32s roundAndPackInt32(int zSign, Bit64u absZ, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status);
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively.  Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 roundAndPackFloatx80(int roundingPrecision, 
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision, 
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status);
+
+#endif // FLOATX80
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal quadruple-precision floating-point value
+| represented by the denormalized significand formed by the concatenation of
+| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
+| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
+| significand are stored at the location pointed to by `zSig0Ptr', and the
+| least significant 64 bits of the normalized significand are stored at the
+| location pointed to by `zSig1Ptr'.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat128Subnormal(
+     Bit64u aSig0, Bit64u aSig1, Bit32s *zExpPtr, Bit64u *zSig0Ptr, Bit64u *zSig1Ptr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent.  The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 roundAndPackFloat128(
+     int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, Bit64u zSig2, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input.  This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+float128 normalizeRoundAndPackFloat128(
+     int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status);
+
+#endif // FLOAT128
+
+#endif
--- a/bochs/fpu/softfloat-specialize.cc
+++ b/bochs/fpu/softfloat-specialize.cc
@ -0,0 +1,195 @@
+/*============================================================================
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#define FLOAT128
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#include "softfloat.h"
+#include "softfloat-specialize.h"
+
+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float32 propagateFloat32NaN(float32 a, float32 b, float_status_t &status)
+{
+    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float32_is_nan(a);
+    aIsSignalingNaN = float32_is_signaling_nan(a);
+    bIsNaN = float32_is_nan(b);
+    bIsSignalingNaN = float32_is_signaling_nan(b);
+    a |= 0x00400000;
+    b |= 0x00400000;
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (get_float_nan_handling_mode(status) == float_larger_significand_nan) {
+        if (aIsSignalingNaN) {
+            if (bIsSignalingNaN) goto returnLargerSignificand;
+            return bIsNaN ? b : a;
+        }
+        else if (aIsNaN) {
+            if (bIsSignalingNaN | ! bIsNaN) return a;
+      returnLargerSignificand:
+            if ((Bit32u) (a<<1) < (Bit32u) (b<<1)) return b;
+            if ((Bit32u) (b<<1) < (Bit32u) (a<<1)) return a;
+            return (a < b) ? a : b;
+        }
+        else {
+            return b;
+        }
+    } else {
+        return (aIsSignalingNaN | aIsNaN) ? a : b;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float64 propagateFloat64NaN(float64 a, float64 b, float_status_t &status)
+{
+    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+    aIsNaN = float64_is_nan(a);
+    aIsSignalingNaN = float64_is_signaling_nan(a);
+    bIsNaN = float64_is_nan(b);
+    bIsSignalingNaN = float64_is_signaling_nan(b);
+    a |= BX_CONST64(0x0008000000000000);
+    b |= BX_CONST64(0x0008000000000000);
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (get_float_nan_handling_mode(status) == float_larger_significand_nan) {
+        if (aIsSignalingNaN) {
+            if (bIsSignalingNaN) goto returnLargerSignificand;
+            return bIsNaN ? b : a;
+        }
+        else if (aIsNaN) {
+            if (bIsSignalingNaN | ! bIsNaN) return a;
+      returnLargerSignificand:
+            if ((Bit64u) (a<<1) < (Bit64u) (b<<1)) return b;
+            if ((Bit64u) (b<<1) < (Bit64u) (a<<1)) return a;
+            return (a < b) ? a : b;
+        }
+        else {
+            return b;
+        }
+    } else {
+        return (aIsSignalingNaN | aIsNaN) ? a : b;
+    }
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status_t &status)
+{
+    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+    aIsNaN = floatx80_is_nan(a);
+    aIsSignalingNaN = floatx80_is_signaling_nan(a);
+    bIsNaN = floatx80_is_nan(b);
+    bIsSignalingNaN = floatx80_is_signaling_nan(b);
+    a.fraction |= BX_CONST64(0xC000000000000000);
+    b.fraction |= BX_CONST64(0xC000000000000000);
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (aIsSignalingNaN) {
+        if (bIsSignalingNaN) goto returnLargerSignificand;
+        return bIsNaN ? b : a;
+    }
+    else if (aIsNaN) {
+        if (bIsSignalingNaN | ! bIsNaN) return a;
+ returnLargerSignificand:
+        if (a.fraction < b.fraction) return b;
+        if (b.fraction < a.fraction) return a;
+        return (a.exp < b.exp) ? a : b;
+    }
+    else {
+        return b;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+const floatx80 floatx80_default_nan = 
+    packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+#endif /* FLOATX80 */
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Takes two quadruple-precision floating-point values `a' and `b', one of
+| which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float128 propagateFloat128NaN(float128 a, float128 b, float_status_t &status)
+{
+    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+    aIsNaN = float128_is_nan(a);
+    aIsSignalingNaN = float128_is_signaling_nan(a);
+    bIsNaN = float128_is_nan(b);
+    bIsSignalingNaN = float128_is_signaling_nan(b);
+    a.hi |= BX_CONST64(0x0000800000000000);
+    b.hi |= BX_CONST64(0x0000800000000000);
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (aIsSignalingNaN) {
+        if (bIsSignalingNaN) goto returnLargerSignificand;
+        return bIsNaN ? b : a;
+    }
+    else if (aIsNaN) {
+        if (bIsSignalingNaN | !bIsNaN) return a;
+ returnLargerSignificand:
+        if (lt128(a.hi<<1, a.lo, b.hi<<1, b.lo)) return b;
+        if (lt128(b.hi<<1, b.lo, a.hi<<1, a.lo)) return a;
+        return (a.hi < b.hi) ? a : b;
+    }
+    else {
+        return b;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.
+*----------------------------------------------------------------------------*/
+const float128 float128_default_nan = 
+    packFloat128(float128_default_nan_hi, float128_default_nan_lo);
+
+#endif /* FLOAT128 */
--- a/bochs/fpu/softfloat-specialize.h
+++ b/bochs/fpu/softfloat-specialize.h
@ -27,6 +27,9 @@ the work is derivative, and (2) the source code includes prominent notice with
 these four paragraphs for those parts of this code that are retained.
 =============================================================================*/

+#ifndef _SOFTFLOAT_SPECIALIZE_H_
+#define _SOFTFLOAT_SPECIALIZE_H_
+
 /*============================================================================
 * Adapted for Bochs (x86 achitecture simulator) by
 *            Stanislav Shwartsman (gate at fidonet.org.il)
@ -49,9 +52,6 @@ typedef struct {
 | The pattern for a default generated single-precision NaN.
 *----------------------------------------------------------------------------*/
 #define float32_default_nan 0xFFC00000
-/*
-#define float32_default_nan 0x7FFFFFFF
-*/

 #define float32_fraction extractFloat32Frac
 #define float32_exp extractFloat32Exp
@ -146,6 +146,14 @@ BX_CPP_INLINE float32 commonNaNToFloat32(commonNaNT a)
    return (((Bit32u) a.sign)<<31) | 0x7FC00000 | (a.hi>>41);
 }

+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float32 propagateFloat32NaN(float32 a, float32 b, float_status_t &status);
+
 /*----------------------------------------------------------------------------
 | Takes single-precision floating-point NaN `a' and returns the appropriate 
 | NaN result.  If `a' is a signaling NaN, the invalid exception is raised.
@ -159,50 +167,10 @@ BX_CPP_INLINE float32 propagateFloat32NaN(float32 a, float_status_t &status)
    return a | 0x00400000;
 }

-/*----------------------------------------------------------------------------
-| Takes two single-precision floating-point values `a' and `b', one of which
-| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
-| signaling NaN, the invalid exception is raised.
-*----------------------------------------------------------------------------*/
-
-static float32 propagateFloat32NaN(float32 a, float32 b, float_status_t &status)
-{
-    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
-
-    aIsNaN = float32_is_nan(a);
-    aIsSignalingNaN = float32_is_signaling_nan(a);
-    bIsNaN = float32_is_nan(b);
-    bIsSignalingNaN = float32_is_signaling_nan(b);
-    a |= 0x00400000;
-    b |= 0x00400000;
-    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
-    if (get_float_nan_handling_mode(status) == float_larger_significand_nan) {
-        if (aIsSignalingNaN) {
-            if (bIsSignalingNaN) goto returnLargerSignificand;
-            return bIsNaN ? b : a;
-        }
-        else if (aIsNaN) {
-            if (bIsSignalingNaN | ! bIsNaN) return a;
-      returnLargerSignificand:
-            if ((Bit32u) (a<<1) < (Bit32u) (b<<1)) return b;
-            if ((Bit32u) (b<<1) < (Bit32u) (a<<1)) return a;
-            return (a < b) ? a : b;
-        }
-        else {
-            return b;
-        }
-    } else {
-        return (aIsSignalingNaN | aIsNaN) ? a : b;
-    }
-}
-
 /*----------------------------------------------------------------------------
 | The pattern for a default generated double-precision NaN.
 *----------------------------------------------------------------------------*/
 #define float64_default_nan BX_CONST64(0xFFF8000000000000)
-/*
-#define float64_default_nan BX_CONST64(0x7FFFFFFFFFFFFFFF)
-*/

 #define float64_fraction extractFloat64Frac
 #define float64_exp extractFloat64Exp
@ -297,6 +265,14 @@ BX_CPP_INLINE float64 commonNaNToFloat64(commonNaNT a)
    return (((Bit64u) a.sign)<<63) | BX_CONST64(0x7FF8000000000000) | (a.hi>>12);
 }

+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float64 propagateFloat64NaN(float64 a, float64 b, float_status_t &status);
+
 /*----------------------------------------------------------------------------
 | Takes double-precision floating-point NaN `a' and returns the appropriate 
 | NaN result.  If `a' is a signaling NaN, the invalid exception is raised.
@ -310,43 +286,6 @@ BX_CPP_INLINE float64 propagateFloat64NaN(float64 a, float_status_t &status)
    return a | BX_CONST64(0x0008000000000000);
 }

-/*----------------------------------------------------------------------------
-| Takes two double-precision floating-point values `a' and `b', one of which
-| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
-| signaling NaN, the invalid exception is raised.
-*----------------------------------------------------------------------------*/
-
-static float64 propagateFloat64NaN(float64 a, float64 b, float_status_t &status)
-{
-    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
-
-    aIsNaN = float64_is_nan(a);
-    aIsSignalingNaN = float64_is_signaling_nan(a);
-    bIsNaN = float64_is_nan(b);
-    bIsSignalingNaN = float64_is_signaling_nan(b);
-    a |= BX_CONST64(0x0008000000000000);
-    b |= BX_CONST64(0x0008000000000000);
-    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
-    if (get_float_nan_handling_mode(status) == float_larger_significand_nan) {
-        if (aIsSignalingNaN) {
-            if (bIsSignalingNaN) goto returnLargerSignificand;
-            return bIsNaN ? b : a;
-        }
-        else if (aIsNaN) {
-            if (bIsSignalingNaN | ! bIsNaN) return a;
-      returnLargerSignificand:
-            if ((Bit64u) (a<<1) < (Bit64u) (b<<1)) return b;
-            if ((Bit64u) (b<<1) < (Bit64u) (a<<1)) return a;
-            return (a < b) ? a : b;
-        }
-        else {
-            return b;
-        }
-    } else {
-        return (aIsSignalingNaN | aIsNaN) ? a : b;
-    }
-}
-
 #ifdef FLOATX80

 /*----------------------------------------------------------------------------
@ -356,15 +295,13 @@ static float64 propagateFloat64NaN(float64 a, float64 b, float_status_t &status)
 *----------------------------------------------------------------------------*/
 #define floatx80_default_nan_exp 0xFFFF
 #define floatx80_default_nan_fraction BX_CONST64(0xC000000000000000)
-/*
-#define floatx80_default_nan_exp 0x7FFF
-#define floatx80_default_nan_fraction BX_CONST64(0xFFFFFFFFFFFFFFFF)
-*/

 #define floatx80_fraction extractFloatx80Frac
 #define floatx80_exp extractFloatx80Exp
 #define floatx80_sign extractFloatx80Sign

+#define EXP_BIAS 0x3FFF
+
 /*----------------------------------------------------------------------------
 | Returns the fraction bits of the extended double-precision floating-point
 | value `a'.
@ -404,7 +341,7 @@ BX_CPP_INLINE floatx80 packFloatx80(int zSign, Bit32s zExp, Bit64u zSig)
 {
    floatx80 z;
    z.fraction = zSig;
-    z.exp = (((Bit16u) zSign)<<15) + zExp;
+    z.exp = (zSign << 15) + zExp;
    return z;
 }

@ -425,14 +362,24 @@ BX_CPP_INLINE int floatx80_is_nan(floatx80 a)

 BX_CPP_INLINE int floatx80_is_signaling_nan(floatx80 a)
 {
-    Bit64s aLow = a.fraction & ~BX_CONST64(0x4000000000000000);
+    Bit64u aLow = a.fraction & ~BX_CONST64(0x4000000000000000);
    return ((a.exp & 0x7FFF) == 0x7FFF) &&
-            (Bit64s) (aLow<<1) && (a.fraction == aLow);
+            ((Bit64u) (aLow<<1)) && (a.fraction == aLow);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is an
+| unsupported; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int floatx80_is_unsupported(floatx80 a)
+{
+    return ((a.exp & 0x7FFF) && !(a.fraction & BX_CONST64(0x8000000000000000)));
 }

 /*----------------------------------------------------------------------------
 | Returns the result of converting the extended double-precision floating-
-| point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
+| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the
 | invalid exception is raised.
 *----------------------------------------------------------------------------*/

@ -459,6 +406,14 @@ BX_CPP_INLINE floatx80 commonNaNToFloatx80(commonNaNT a)
    return z;
 }

+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status_t &status);
+
 /*----------------------------------------------------------------------------
 | Takes extended double-precision floating-point  NaN  `a' and returns the 
 | appropriate NaN result. If `a' is a signaling NaN, the invalid exception 
@ -476,36 +431,161 @@ BX_CPP_INLINE floatx80 propagateFloatx80NaN(floatx80 a, float_status_t &status)
 }

 /*----------------------------------------------------------------------------
-| Takes two extended double-precision floating-point values `a' and `b', one
-| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const floatx80 floatx80_default_nan;
+
+#endif /* FLOATX80 */
+
+#ifdef FLOAT128
+
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN. The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*/
+#define float128_default_nan_hi BX_CONST64(0xFFFF800000000000)
+#define float128_default_nan_lo BX_CONST64(0x0000000000000000)
+
+#define float128_exp extractFloat128Exp
+
+/*----------------------------------------------------------------------------
+| Returns the least-significant 64 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit64u extractFloat128Frac1(float128 a)
+{
+    return a.lo;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the most-significant 48 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit64u extractFloat128Frac0(float128 a)
+{
+    return a.hi & BX_CONST64(0x0000FFFFFFFFFFFF);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the quadruple-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit32s extractFloat128Exp(float128 a)
+{
+    return (a.hi>>48) & 0x7FFF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the quadruple-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int extractFloat128Sign(float128 a)
+{
+    return a.hi >> 63;
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', the exponent `zExp', and the significand formed
+| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
+| floating-point value, returning the result.  After being shifted into the
+| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
+| added together to form the most significant 32 bits of the result.  This
+| means that any integer portion of `zSig0' will be added into the exponent.
+| Since a properly normalized significand will have an integer portion equal
+| to 1, the `zExp' input should be 1 less than the desired result exponent
+| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float128 packFloat128(int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1)
+{
+    float128 z;
+    z.lo = zSig1;
+    z.hi = (((Bit64u) zSign)<<63) + (((Bit64u) zExp)<<48) + zSig0;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Packs two 64-bit precision integers into into the quadruple-precision 
+| floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float128 packFloat128(Bit64u zHi, Bit64u zLo)
+{
+    float128 z;
+    z.lo = zLo;
+    z.hi = zHi;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float128_is_nan(float128 a)
+{
+    return (BX_CONST64(0xFFFE000000000000) <= (Bit64u) (a.hi<<1))
+        && (a.lo || (a.hi & BX_CONST64(0x0000FFFFFFFFFFFF)));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float128_is_signaling_nan(float128 a)
+{
+    return (((a.hi>>47) & 0xFFFF) == 0xFFFE)
+        && (a.lo || (a.hi & BX_CONST64(0x00007FFFFFFFFFFF)));
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE commonNaNT float128ToCommonNaN(float128 a, float_status_t &status)
+{
+    commonNaNT z;
+    if (float128_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = a.hi>>63;
+    shortShift128Left(a.hi, a.lo, 16, &z.hi, &z.lo);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the quadruple-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float128 commonNaNToFloat128(commonNaNT a)
+{
+    float128 z;
+    shift128Right(a.hi, a.lo, 16, &z.hi, &z.lo);
+    z.hi |= (((Bit64u) a.sign)<<63) | BX_CONST64(0x7FFF800000000000);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two quadruple-precision floating-point values `a' and `b', one of
+| which is a NaN, and returns the appropriate NaN result.  If either `a' or
 | `b' is a signaling NaN, the invalid exception is raised.
 *----------------------------------------------------------------------------*/

-static floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status_t &status)
-{
-    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+float128 propagateFloat128NaN(float128 a, float128 b, float_status_t &status);

-    aIsNaN = floatx80_is_nan(a);
-    aIsSignalingNaN = floatx80_is_signaling_nan(a);
-    bIsNaN = floatx80_is_nan(b);
-    bIsSignalingNaN = floatx80_is_signaling_nan(b);
-    a.fraction |= BX_CONST64(0xC000000000000000);
-    b.fraction |= BX_CONST64(0xC000000000000000);
-    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
-    if (aIsSignalingNaN) {
-        if (bIsSignalingNaN) goto returnLargerSignificand;
-        return bIsNaN ? b : a;
-    }
-    else if (aIsNaN) {
-        if (bIsSignalingNaN | ! bIsNaN) return a;
- returnLargerSignificand:
-        if (a.fraction < b.fraction) return b;
-        if (b.fraction < a.fraction) return a;
-        return (a.exp < b.exp) ? a : b;
-    }
-    else {
-        return b;
-    }
-}
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float128 float128_default_nan;
+
+#endif /* FLOAT128 */

 #endif
--- a/bochs/fpu/softfloat.cc
+++ b/bochs/fpu/softfloat.cc
--- a/bochs/fpu/softfloat.h
+++ b/bochs/fpu/softfloat.h
@ -34,8 +34,8 @@ these four paragraphs for those parts of this code that are retained.

 #include <config.h>      /* generated by configure script from config.h.in */

-#ifndef SOFTFLOAT_H
-#define SOFTFLOAT_H
+#ifndef _SOFTFLOAT_H_
+#define _SOFTFLOAT_H_

 #define FLOATX80

@ -57,14 +57,6 @@ typedef enum {
    float_normalized
 } float_class_t;

-/*----------------------------------------------------------------------------
-| Software IEC/IEEE floating-point underflow tininess-detection mode.
-*----------------------------------------------------------------------------*/
-enum {
-    float_tininess_after_rounding  = 0,
-    float_tininess_before_rounding = 1
-};
-
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE floating-point NaN operands handling mode.
 *----------------------------------------------------------------------------*/
@ -112,14 +104,13 @@ struct float_status_t
 {
 #ifdef FLOATX80
    int float_rounding_precision;	/* floatx80 only */
+    int float_precision_lost_up;	/* flag register, floatx80 only */
 #endif
-    int float_detect_tininess;
    int float_rounding_mode;
    int float_exception_flags;
-    int float_nan_handling_mode;
+    int float_nan_handling_mode;	/* flag register */
    int flush_underflow_to_zero;	/* flag register */
 };
-typedef struct float_status_t softfloat_status_word_t;

 /*----------------------------------------------------------------------------
 | Routine to raise any or all of the software IEC/IEEE floating-point
@ -250,16 +241,38 @@ int float64_is_signaling_nan(float64);
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE floating-point types.
 *----------------------------------------------------------------------------*/
+
+// Endian  Host byte order         Guest (x86) byte order
+// ======================================================
+// Little  FFFFFFFFEEAAAAAA        FFFFFFFFEEAAAAAA
+// Big     AAAAAAEEFFFFFFFF        FFFFFFFFEEAAAAAA
+//
+// Legend: F - fraction/mmx
+//         E - exponent
+//         A - alignment
+
 #ifdef BX_BIG_ENDIAN
-struct floatx80 {	// do not allow 16-byte extension of the structure
+#if defined(__MWERKS__) && defined(macintosh)
+#pragma options align=mac68k
+#endif
+struct floatx80 {
+    Bit16u align1;
+    Bit16u align2;
+    Bit16u align3;
    Bit16u exp;
    Bit64u fraction;
-} GCC_ATTRIBUTE((aligned(1), packed));
+} GCC_ATTRIBUTE((aligned(16), packed));
+#if defined(__MWERKS__) && defined(macintosh)
+#pragma options align=reset
+#endif
 #else
-struct floatx80 {	// do not allow 16-byte extension of the structure
+struct floatx80 {
    Bit64u fraction;
    Bit16u exp;
-} GCC_ATTRIBUTE((aligned(1), packed));
+    Bit16u align1;
+    Bit16u align2;
+    Bit16u align3;
+} GCC_ATTRIBUTE((aligned(16), packed));
 #endif

 /*----------------------------------------------------------------------------
@ -298,17 +311,37 @@ floatx80 floatx80_sub(floatx80, floatx80, float_status_t &status);
 floatx80 floatx80_mul(floatx80, floatx80, float_status_t &status);
 floatx80 floatx80_div(floatx80, floatx80, float_status_t &status);
 floatx80 floatx80_sqrt(floatx80, float_status_t &status);
-int floatx80_eq(floatx80, floatx80, float_status_t &status);
-int floatx80_le(floatx80, floatx80, float_status_t &status);
-int floatx80_lt(floatx80, floatx80, float_status_t &status);
-int floatx80_eq_signaling(floatx80, floatx80, float_status_t &status);
-int floatx80_le_quiet(floatx80, floatx80, float_status_t &status);
-int floatx80_lt_quiet(floatx80, floatx80, float_status_t &status);
-int floatx80_compare(floatx80, floatx80, float_status_t &status);
-int floatx80_compare_quiet(floatx80, floatx80, float_status_t &status);

-float_class_t floatx80_class(floatx80);
 int floatx80_is_signaling_nan(floatx80);
+
+#endif  /* FLOATX80 */
+
+#ifdef FLOAT128
+
+#ifdef BX_BIG_ENDIAN
+struct float128 {
+    Bit64u hi, lo;
+};
+#else
+struct float128 {
+    Bit64u lo, hi;
+};
 #endif

+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision conversion routines.
+*----------------------------------------------------------------------------*/
+float128 floatx80_to_float128(floatx80 a, float_status_t &status);
+floatx80 float128_to_floatx80(float128 a, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision operations.
+*----------------------------------------------------------------------------*/
+float128 float128_add(float128 a, float128 b, float_status_t &status);
+float128 float128_sub(float128 a, float128 b, float_status_t &status);
+float128 float128_mul(float128 a, float128 b, float_status_t &status);
+float128 float128_div(float128 a, float128 b, float_status_t &status);
+
+#endif  /* FLOAT128 */
+
 #endif
--- a/bochs/fpu/softfloatx80.cc
+++ b/bochs/fpu/softfloatx80.cc
@ -0,0 +1,340 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 16-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic - which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN or the 
+| conversion overflows, the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit16s floatx80_to_int16(floatx80 a, float_status_t &status)
+{
+   if (floatx80_is_unsupported(a))
+   {
+        float_raise(status, float_flag_invalid);
+        return int16_indefinite;
+   }
+
+   Bit32s v32 = floatx80_to_int32(a, status);
+
+   if ((v32 > (Bit32s) BX_MAX_BIT16S) || (v32 < (Bit32s) BX_MIN_BIT16S))
+   {
+        float_raise(status, float_flag_invalid);
+        return int16_indefinite;
+   }
+
+   return (Bit16s) v32;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 16-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN or the conversion overflows, the integer 
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit16s floatx80_to_int16_round_to_zero(floatx80 a, float_status_t &status)
+{
+   if (floatx80_is_unsupported(a))
+   {
+        float_raise(status, float_flag_invalid);
+        return int16_indefinite;
+   }
+
+   Bit32s v32 = floatx80_to_int32_round_to_zero(a, status);
+
+   if ((v32 > (Bit32s) BX_MAX_BIT16S) || (v32 < (Bit32s) BX_MIN_BIT16S))
+   {
+        float_raise(status, float_flag_invalid);
+        return int16_indefinite;
+   }
+
+   return (Bit16s) v32;
+}
+
+/*----------------------------------------------------------------------------
+| Separate the source extended double-precision floating point value `a'
+| into its exponent and significand, store the significant back to the
+| 'a' and return the exponent. The operation performed is a superset of 
+| the IEC/IEEE recommended logb(x) function.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_extract(floatx80 &a, float_status_t &status)
+{
+    Bit64u aSig = extractFloatx80Frac(a);
+    Bit32s aExp = extractFloatx80Exp(a);
+    int   aSign = extractFloatx80Sign(a);
+
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        a = floatx80_default_nan;
+        return a;
+    }
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1)) 
+        {
+            float_raise(status, float_flag_invalid);
+            a = propagateFloatx80NaN(a, status);
+            return a;
+        }
+        return packFloatx80(0, 0x7FFF, BX_CONST64(0x8000000000000000));;
+    }
+    if (aExp == 0)
+    {
+        if (aSig == 0) {
+            float_raise(status, float_flag_divbyzero);
+            a = packFloatx80(aSign, 0, 0);
+            return packFloatx80(1, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+
+    a.exp = (aSign << 15) + 0x3FFF;
+    a.fraction = aSig;
+    return int32_to_floatx80(aExp - 0x3FFF);
+}
+
+/*----------------------------------------------------------------------------
+| Scales extended double-precision floating-point value in operand `a' by 
+| value `b'. The function truncates the value in the second operand 'b' to 
+| an integral value and adds that value to the exponent of the operand 'a'.
+| The operation performed according to the IEC/IEEE Standard for Binary 
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status_t &status)
+{
+    Bit32s aExp, bExp;
+    Bit64u aSig, bSig;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+    int bSign = extractFloatx80Sign(b);
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) 
+        {
+            return propagateFloatx80NaN(a, b, status);
+        }
+        if ((bExp == 0x7FFF) && bSign) {
+            float_raise(status, float_flag_invalid);
+            return floatx80_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return a;
+    }
+    if (bExp == 0x7FFF) {
+        if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if ((aExp | aSig) == 0) {
+            if (! bSign) {
+                float_raise(status, float_flag_invalid);
+                return floatx80_default_nan;
+            }
+            return a;
+        }
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        if (bSign) return packFloatx80(aSign, 0, 0);
+        return packFloatx80(aSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return a;
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) return a;
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+
+    if (bExp > 0x400E) {   
+        /* generate appropriate overflow/underflow */
+        return roundAndPackFloatx80(80, aSign, 
+                          bSign ? -0x3FFF : 0x7FFF, aSig, 0, status);
+    }
+    if (bExp < 0x3FFF) return a;
+
+    int shiftCount = 0x403E - bExp;
+    bSig >>= shiftCount;
+    Bit32s scale = bSig;
+    if (bSign) scale = -scale; /* -32768..32767 */
+    return
+        roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0, status);
+}
+
+/*----------------------------------------------------------------------------
+| Determine extended-precision floating-point number class.
+*----------------------------------------------------------------------------*/
+
+float_class_t floatx80_class(floatx80 a)
+{
+   Bit32s aExp = extractFloatx80Exp(a);
+   Bit64u aSig = extractFloatx80Frac(a);
+
+   if(aExp == 0) {
+       if (aSig == 0)
+           return float_zero;
+
+       /* denormal or pseudo-denormal */
+       return float_denormal;
+   }
+
+   /* valid numbers have the MS bit set */
+   if (!(aSig & BX_CONST64(0x8000000000000000)))
+       return float_NaN; /* report unsupported as NaNs */
+
+   if(aExp == 0x7fff) {
+       int aSign = extractFloatx80Sign(a);
+
+       if (((Bit64u) (aSig<< 1)) == 0)
+           return (aSign) ? float_negative_inf : float_positive_inf;
+
+       return float_NaN;
+   }
+    
+   return float_normalized;
+}
+
+/*----------------------------------------------------------------------------
+| Compare  between  two extended precision  floating  point  numbers. Returns
+| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
+| the    value    'a'   is   less   than   the   corresponding   value   `b',
+| 'float_relation_greater' if the value 'a' is greater than the corresponding
+| value `b', or 'float_relation_unordered' otherwise. 
+*----------------------------------------------------------------------------*/
+
+int floatx80_compare(floatx80 a, floatx80 b, float_status_t &status)
+{
+    float_class_t aClass = floatx80_class(a);
+    float_class_t bClass = floatx80_class(b);
+
+    if (aClass == float_NaN || bClass == float_NaN)
+    {
+        float_raise(status, float_flag_invalid);
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_denormal || bClass == float_denormal) 
+    {
+        float_raise(status, float_flag_denormal);
+    }
+
+    if ((a.fraction == b.fraction) && (a.exp == b.exp))
+    {
+        return float_relation_equal;
+    }
+
+    if (aClass == float_zero && bClass == float_zero)
+    {
+        return float_relation_equal;
+    }
+
+    int aSign = extractFloatx80Sign(a);
+    int bSign = extractFloatx80Sign(b);
+    if (aSign != bSign)
+        return (aSign) ? float_relation_less : float_relation_greater;
+
+    int less_than = 
+	aSign ? lt128(b.exp, b.fraction, a.exp, a.fraction)
+	      : lt128(a.exp, a.fraction, b.exp, b.fraction);
+
+    if (less_than) return float_relation_less;
+    return float_relation_greater;
+}
+
+/*----------------------------------------------------------------------------
+| Compare  between  two extended precision  floating  point  numbers. Returns
+| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
+| the    value    'a'   is   less   than   the   corresponding   value   `b',
+| 'float_relation_greater' if the value 'a' is greater than the corresponding
+| value `b', or 'float_relation_unordered' otherwise. Quiet NaNs do not cause 
+| an exception.
+*----------------------------------------------------------------------------*/
+
+int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status_t &status)
+{
+    float_class_t aClass = floatx80_class(a);
+    float_class_t bClass = floatx80_class(b);
+
+    if (aClass == float_NaN || bClass == float_NaN)
+    {
+        if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+            float_raise(status, float_flag_invalid);
+
+        if (floatx80_is_signaling_nan(a) || floatx80_is_signaling_nan(b))
+            float_raise(status, float_flag_invalid);
+
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_denormal || bClass == float_denormal) 
+    {
+        float_raise(status, float_flag_denormal);
+    }
+
+    if ((a.fraction == b.fraction) && (a.exp == b.exp))
+    {
+        return float_relation_equal;
+    }
+
+    if (aClass == float_zero && bClass == float_zero)
+    {
+        return float_relation_equal;
+    }
+
+    int aSign = extractFloatx80Sign(a);
+    int bSign = extractFloatx80Sign(b);
+    if (aSign != bSign)
+        return (aSign) ? float_relation_less : float_relation_greater;
+
+    int less_than = 
+	aSign ? lt128(b.exp, b.fraction, a.exp, a.fraction)
+	      : lt128(a.exp, a.fraction, b.exp, b.fraction);
+
+    if (less_than) return float_relation_less;
+    return float_relation_greater;
+}
--- a/bochs/fpu/softfloatx80.h
+++ b/bochs/fpu/softfloatx80.h
@ -0,0 +1,100 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#ifndef _SOFTFLOATX80_EXTENSIONS_H_
+#define _SOFTFLOATX80_EXTENSIONS_H_
+
+#include "softfloat.h"
+#include "softfloat-specialize.h"
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+
+Bit16s floatx80_to_int16(floatx80, float_status_t &status);
+Bit16s floatx80_to_int16_round_to_zero(floatx80, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+
+float_class_t floatx80_class(floatx80);
+floatx80 floatx80_extract(floatx80 &a, float_status_t &status);
+floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status_t &status);
+floatx80 floatx80_remainder(floatx80 a, floatx80 b, Bit64u &q, float_status_t &status);
+floatx80 floatx80_ieee754_remainder(floatx80 a, floatx80 b, Bit64u &q, float_status_t &status);
+floatx80 f2xm1(floatx80 a, float_status_t &status);
+floatx80 fyl2x(floatx80 a, floatx80 b, float_status_t &status);
+floatx80 fyl2xp1(floatx80 a, floatx80 b, float_status_t &status);
+floatx80 fpatan(floatx80 a, floatx80 b, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision trigonometric functions.
+*----------------------------------------------------------------------------*/
+
+int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, float_status_t &status);
+int fsin(floatx80 &a, float_status_t &status);
+int fcos(floatx80 &a, float_status_t &status);
+int ftan(floatx80 &a, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision compare.
+*----------------------------------------------------------------------------*/
+
+int floatx80_compare(floatx80, floatx80, float_status_t &status);
+int floatx80_compare_quiet(floatx80, floatx80, float_status_t &status);
+
+/*-----------------------------------------------------------------------------
+| Calculates the absolute value of the extended double-precision floating-point
+| value `a'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE floatx80& floatx80_abs(floatx80 &reg)
+{
+    reg.exp &= 0x7FFF;
+    return reg;   
+}
+
+/*-----------------------------------------------------------------------------
+| Changes the sign of the extended double-precision floating-point value 'a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary 
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE floatx80& floatx80_chs(floatx80 &reg)
+{
+    reg.exp ^= 0x8000;
+    return reg;   
+}
+
+/*-----------------------------------------------------------------------------
+| Commonly used extended double-precision floating-point constants.
+*----------------------------------------------------------------------------*/
+
+extern const floatx80 Const_Z;
+extern const floatx80 Const_1;
+
+#endif
--- a/bochs/fpu/status_w.h
+++ b/bochs/fpu/status_w.h
@ -1,61 +1,72 @@
-/*---------------------------------------------------------------------------+
- |  status_w.h                                                               |
- |  $Id: status_w.h,v 1.6 2003-07-31 21:07:38 sshwarts Exp $
- |                                                                           |
- | Copyright (C) 1992,1993                                                   |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
- |                                                                           |
- +---------------------------------------------------------------------------*/
+/////////////////////////////////////////////////////////////////////////
+//
+//   Copyright (c) 2004 Stanislav Shwartsman
+//          Written by Stanislav Shwartsman <gate at fidonet.org.il>
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//

 #ifndef _STATUS_H_
 #define _STATUS_H_

-#include "fpu_emu.h"    /* for definition of PECULIAR_486 */
+/* Status Word */
+#define FPU_SW_Backward		(0x8000)  /* backward compatibility */
+#define FPU_SW_C3	 	(0x4000)  /* condition bit 3 */
+#define FPU_SW_Top		(0x3800)  /* top of stack */
+#define FPU_SW_C2		(0x0400)  /* condition bit 2 */
+#define FPU_SW_C1		(0x0200)  /* condition bit 1 */
+#define FPU_SW_C0		(0x0100)  /* condition bit 0 */
+#define FPU_SW_Summary   	(0x0080)  /* exception summary */
+#define FPU_SW_Stack_Fault	(0x0040)  /* stack fault */
+#define FPU_SW_Precision   	(0x0020)  /* loss of precision */
+#define FPU_SW_Underflow   	(0x0010)  /* underflow */
+#define FPU_SW_Overflow    	(0x0008)  /* overflow */
+#define FPU_SW_Zero_Div    	(0x0004)  /* divide by zero */
+#define FPU_SW_Denormal_Op   	(0x0002)  /* denormalized operand */
+#define FPU_SW_Invalid    	(0x0001)  /* invalid operation */

-#define SW_Backward    	(0x8000)	/* backward compatibility */
-#define SW_C3		(0x4000)	/* condition bit 3 */
-#define SW_Top		(0x3800)	/* top of stack */
-#define SW_Top_Shift 	(11)		/* shift for top of stack bits */
-#define SW_C2		(0x0400)	/* condition bit 2 */
-#define SW_C1		(0x0200)	/* condition bit 1 */
-#define SW_C0		(0x0100)	/* condition bit 0 */
-#define SW_Summary     	(0x0080)	/* exception summary */
-#define SW_Stack_Fault	(0x0040)	/* stack fault */
-#define SW_Precision   	(0x0020)	/* loss of precision */
-#define SW_Underflow   	(0x0010)	/* underflow */
-#define SW_Overflow    	(0x0008)	/* overflow */
-#define SW_Zero_Div    	(0x0004)	/* divide by zero */
-#define SW_Denorm_Op   	(0x0002)	/* denormalized operand */
-#define SW_Invalid     	(0x0001)	/* invalid operation */
+#define FPU_SW_CC (FPU_SW_C0|FPU_SW_C1|FPU_SW_C2|FPU_SW_C3)

-#define SW_Exc_Mask     (0x27f)  /* Status word exception bit mask */
+#define FPU_SW_Exceptions_Mask  (0x027f)  /* status word exceptions bit mask */

-#define COMP_A_gt_B	1
-#define COMP_A_eq_B	2
-#define COMP_A_lt_B	3
-#define COMP_No_Comp	4
-#define COMP_Denormal   0x20
-#define COMP_NaN	0x40
-#define COMP_SNaN	0x80
+/* Exception flags: */
+#define FPU_EX_Precision	(0x0020)  /* loss of precision */
+#define FPU_EX_Underflow	(0x0010)  /* underflow */
+#define FPU_EX_Overflow		(0x0008)  /* overflow */
+#define FPU_EX_Zero_Div		(0x0004)  /* divide by zero */
+#define FPU_EX_Denormal		(0x0002)  /* denormalized operand */
+#define FPU_EX_Invalid		(0x0001)  /* invalid operation */

-#define status_word() \
-  ((FPU_partial_status & ~SW_Top & 0xffff) | ((FPU_tos << SW_Top_Shift) & SW_Top))
+/* Special exceptions: */
+#define FPU_EX_Stack_Overflow	(0x0041|FPU_SW_C1) 	/* stack overflow */
+#define FPU_EX_Stack_Underflow	(0x0041)		/* stack underflow */
+
+/* precision control */
+#define FPU_EX_Precision_Lost_Up 	(EX_Precision | SW_C1)
+#define FPU_EX_Precision_Lost_Dn        (EX_Precision)

 /*
 * bbd: use do {...} while (0) structure instead of using curly brackets
 * inside parens, which most compilers do not like.
 */
-#define setcc(cc) do { \
-  FPU_partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); \
-  FPU_partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); } while(0)
+#define setcc(cc) do { 				\
+  FPU_PARTIAL_STATUS &= ~(FPU_SW_CC); 		\
+  FPU_PARTIAL_STATUS |= (cc) & FPU_SW_CC; 	\
+} while(0);

-#ifdef PECULIAR_486
-   /* Default, this conveys no information, but an 80486 does it. */
-   /* Clear the SW_C1 bit, "other bits undefined". */
-#  define clear_C1()  { FPU_partial_status &= ~SW_C1; }
-# else
-#  define clear_C1()
-#endif /* PECULIAR_486 */
+#define clear_C1() { FPU_PARTIAL_STATUS &= ~FPU_SW_C1; }
+#define clear_C2() { FPU_PARTIAL_STATUS &= ~FPU_SW_C2; }

-#endif /* _STATUS_H_ */
+#endif
--- a/bochs/fpu/tag_w.h
+++ b/bochs/fpu/tag_w.h
@ -0,0 +1,30 @@
+/////////////////////////////////////////////////////////////////////////
+//
+//   Copyright (c) 2004 Stanislav Shwartsman
+//          Written by Stanislav Shwartsman <gate at fidonet.org.il>
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef _TAG_W_H
+#define _TAG_W_H
+
+/* Tag Word */
+#define FPU_Tag_Valid   0x00
+#define FPU_Tag_Zero    0x01
+#define FPU_Tag_Special 0x02
+#define FPU_Tag_Empty   0x03
+
+#endif
--- a/bochs/fpu/todo
+++ b/bochs/fpu/todo
@ -0,0 +1,15 @@
+TODO:
+----
+
+1. Check for denormal and pseudodenormal operands in ALL instructions
+   I hope all instructions return the same values as real CPU.
+
+2. Unmasked underflow/overflow should correct the result by magic number
+   for all operations, including float32 and float64.
+
+3. Set SW_C1 according to PRECISION_UP or PRECISION_DOWN conditions.
+
+4. Elliminate floa128 use, Intel uses only 67-bit precision calculations
+   when float128 has at least 112-bit. Replacement of float128 with for
+   example 96-bit precision number could significantly speed up 
+   calculations.
--- a/bochs/fpu/wmFPUemu_glue.cc
+++ b/bochs/fpu/wmFPUemu_glue.cc
@ -1,289 +0,0 @@
-//  Copyright (C) 2001  MandrakeSoft S.A.
-//
-//    MandrakeSoft S.A.
-//    43, rue d'Aboukir
-//    75002 Paris - France
-//    http://www.linux-mandrake.com/
-//    http://www.mandrakesoft.com/
-//
-//  This library is free software; you can redistribute it and/or
-//  modify it under the terms of the GNU Lesser General Public
-//  License as published by the Free Software Foundation; either
-//  version 2 of the License, or (at your option) any later version.
-//
-//  This library is distributed in the hope that it will be useful,
-//  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-//  Lesser General Public License for more details.
-//
-//  You should have received a copy of the GNU Lesser General Public
-//  License along with this library; if not, write to the Free Software
-//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
-
-
-//
-// This is the glue logic needed to connect the wm-FPU-emu
-// FPU emulator written by Bill Metzenthen to bochs.
-//
-
-
-#include "bochs.h"
-#include <math.h>
-
-#if !BX_WITH_MACOS
-extern "C" {
-#endif
-
-#include "fpu_emu.h"
-#include "linux/signal.h"
-
-#if !BX_WITH_MACOS
-}
-#endif
-
-#define LOG_THIS genlog->
-#if BX_USE_CPU_SMF
-#define this (BX_CPU(0))
-#endif
-
-// Use this to hold a pointer to the instruction since
-// we can't pass this to the FPU emulation routines, which
-// will ultimately call routines here.
-static bxInstruction_c *fpu_iptr = NULL;
-static BX_CPU_C *fpu_cpu_ptr = NULL;
-
-i387_t *current_i387;
-
-extern "C" void
-math_emulate(fpu_addr_modes addr_modes,
-              u_char  FPU_modrm,
-              u_char byte1,
-              bx_address data_address,
-              struct address data_sel_off,
-              struct address entry_sel_off);
-
-
-  // This is called by bochs upon reset
-  void
-BX_CPU_C::fpu_init(void)
-{
-  current_i387 = &(BX_CPU_THIS_PTR the_i387);
-  finit();
-}
-
-  void
-BX_CPU_C::fpu_execute(bxInstruction_c *i)
-{
-  fpu_addr_modes addr_modes;
-  bx_address data_address;
-  struct address data_sel_off;
-  struct address entry_sel_off;
-  bx_bool is_32;
-
-  fpu_iptr = i;
-  fpu_cpu_ptr = this;
-  current_i387 = &(BX_CPU_THIS_PTR the_i387);
-
-  is_32 = BX_CPU_THIS_PTR sregs[BX_SEG_REG_CS].cache.u.segment.d_b;
-
-  if (protected_mode()) {
-    if (is_32) addr_modes.default_mode = SEG32;
-    else addr_modes.default_mode = PM16;
-    }
-  else if (v8086_mode()) {
-    addr_modes.default_mode = VM86;
-    }
-  else {
-    // real mode, use vm86 for now
-    addr_modes.default_mode = VM86;
-    }
-
-  // Mark if instruction used opsize or addrsize prefixes
-  if (i->as32B() == is_32)
-    addr_modes.override.address_size = 0;
-  else
-    addr_modes.override.address_size = ADDR_SIZE_PREFIX;
-
-  if (i->os32B() == is_32)
-    addr_modes.override.operand_size = 0;
-  else
-    addr_modes.override.operand_size = OP_SIZE_PREFIX;
-
-  // fill in orig eip here in offset
-  // fill in CS in selector
-  entry_sel_off.offset = BX_CPU_THIS_PTR prev_eip;
-  entry_sel_off.selector = BX_CPU_THIS_PTR sregs[BX_SEG_REG_CS].selector.value;
-
-// should set these fields to 0 if mem operand not used
-  data_address = RMAddr(i);
-  data_sel_off.offset = RMAddr(i);
-  data_sel_off.selector = BX_CPU_THIS_PTR sregs[i->seg()].selector.value;
-
-  math_emulate(addr_modes, i->modrm(), i->b1(), data_address,
-                data_sel_off, entry_sel_off);
-}
-
-void BX_CPU_C::print_state_FPU()
-{
-  static double sigh_scale_factor = pow(2.0, -31.0);
-  static double sigl_scale_factor = pow(2.0, -63.0);
-
-  Bit32u reg;
-  reg = i387.cwd;
-  fprintf(stderr, "cwd            0x%-8x\t%d\n", (unsigned) reg, (int) reg);
-  reg = i387.swd;
-  fprintf(stderr, "swd            0x%-8x\t%d\n", (unsigned) reg, (int) reg);
-  reg = i387.twd;
-  fprintf(stderr, "twd            0x%-8x\t%d\n", (unsigned) reg, (int) reg);
-  reg = i387.fip;
-  fprintf(stderr, "fip            0x%-8x\t%d\n", (unsigned) reg, (int) reg);
-  reg = i387.fcs;
-  fprintf(stderr, "fcs            0x%-8x\t%d\n", (unsigned) reg, (int) reg);
-  reg = i387.foo;
-  fprintf(stderr, "foo            0x%-8x\t%d\n", (unsigned) reg, (int) reg);
-  reg = i387.fos;
-  fprintf(stderr, "fos            0x%-8x\t%d\n", (unsigned) reg, (int) reg);
-  // print stack too
-  for (int i=0; i<8; i++) {
-    FPU_REG *fpr = &st(i);
-    double f1 = pow(2.0, ((0x7fff&fpr->exp) - EXTENDED_Ebias));
-    if (fpr->exp & SIGN_Negative) f1 = -f1;
-    double f2 = ((double)fpr->sigh * sigh_scale_factor);
-    double f3 = ((double)fpr->sigl * sigl_scale_factor);
-    double f = f1*(f2+f3);
-    fprintf(stderr, "st%d            %.10f (raw 0x%04x%08x%08x)\n", i, f, 0xffff&fpr->exp, fpr->sigh, fpr->sigl);
-  }
-}
-
-  unsigned
-fpu_get_ds(void)
-{
-  return(fpu_cpu_ptr->sregs[BX_SEG_REG_DS].selector.value);
-}
-
-  void
-fpu_set_ax(Bit16u val16)
-{
-  fpu_cpu_ptr->set_AX(val16);
-}
-
-  void
-fpu_set_eflags(Bit32u val32)
-{
-  fpu_cpu_ptr->writeEFlags(val32, 0xFFFFFFFF);
-}
-
-  Bit32u 
-fpu_get_eflags(void)
-{
-  return fpu_cpu_ptr->read_eflags();
-}
-
-  void BX_CPP_AttrRegparmN(3)
-fpu_verify_area(unsigned what, bx_address ptr, unsigned n)
-{
-  bx_segment_reg_t *seg;
-
-  seg = &fpu_cpu_ptr->sregs[fpu_iptr->seg()];
-
-  if (what == VERIFY_READ) {
-    fpu_cpu_ptr->read_virtual_checks(seg, ptr, n);
-    }
-  else {  // VERIFY_WRITE
-    fpu_cpu_ptr->write_virtual_checks(seg, ptr, n);
-    }
-}
-
-  Bit32u BX_CPP_AttrRegparmN(2)
-fpu_get_user(bx_address ptr, unsigned len)
-{
-  Bit32u val32;
-  Bit16u val16;
-  Bit8u  val8;
-
-  switch (len) {
-    case 1:
-      fpu_cpu_ptr->read_virtual_byte(fpu_iptr->seg(), ptr, &val8);
-      val32 = val8;
-      break;
-    case 2:
-      fpu_cpu_ptr->read_virtual_word(fpu_iptr->seg(), ptr, &val16);
-      val32 = val16;
-      break;
-    case 4:
-      fpu_cpu_ptr->read_virtual_dword(fpu_iptr->seg(), ptr, &val32);
-      break;
-    default:
-      BX_PANIC(("fpu_get_user: len=%u", len));
-    }
-  return(val32);
-}
-
-  void BX_CPP_AttrRegparmN(3)
-fpu_put_user(Bit32u val, bx_address ptr, unsigned len)
-{
-  Bit32u val32;
-  Bit16u val16;
-  Bit8u  val8;
-
-  switch (len) {
-    case 1:
-      val8 = val;
-      fpu_cpu_ptr->write_virtual_byte(fpu_iptr->seg(), ptr, &val8);
-      break;
-    case 2:
-      val16 = val;
-      fpu_cpu_ptr->write_virtual_word(fpu_iptr->seg(), ptr, &val16);
-      break;
-    case 4:
-      val32 = val;
-      fpu_cpu_ptr->write_virtual_dword(fpu_iptr->seg(), ptr, &val32);
-      break;
-    default:
-      BX_PANIC(("fpu_put_user: len=%u", len));
-    }
-}
-
-  void
-math_abort(void *info, unsigned int signal)
-{
-  UNUSED(info); // info is always passed NULL
-#if BX_CPU_LEVEL >= 4
-
-// values of signal:
-//   SIGILL  : opcodes which are illegal
-//   SIGFPE  : unmasked FP exception before WAIT or non-control instruction
-//   SIGSEGV : access data beyond segment violation
-  switch (signal) {
-    case SIGFPE:
-      if (fpu_cpu_ptr->cr0.ne == 0) {
-        // MSDOS compatibility external interrupt (IRQ13)
-        BX_INFO (("math_abort: MSDOS compatibility FPU exception"));
-
-        DEV_pic_raise_irq(13);
-        return;
-        }
-      fpu_cpu_ptr->exception(BX_MF_EXCEPTION, 0, 0);
-      // execution does not reach here
-
-    case SIGILL:
-      BX_INFO (("math_abort: SIGILL not implemented yet."));
-      fpu_cpu_ptr->UndefinedOpcode(fpu_iptr);
-      break;
-
-    case SIGSEGV:
-      BX_PANIC (("math_abort: SIGSEGV not implemented yet."));
-      break;
-    }
-
-#else
-  UNUSED(signal);
-  BX_INFO(("math_abort: CPU<4 not supported yet"));
-#endif
-}
-
-extern "C" int printk(const char * fmt, ...)
-{
-  BX_INFO(("math abort: %s", fmt));
-  return 0;
-}
--- a/bochs/fpu/wm_shrx.c
+++ b/bochs/fpu/wm_shrx.c
@ -1,152 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  wm_shrx.c                                                                |
- |  $Id: wm_shrx.c,v 1.2 2001-10-06 03:53:46 bdenney Exp $
- |                                                                           |
- | 64 bit right shift functions                                              |
- |                                                                           |
- | Copyright (C) 1992,1995,1999                                              |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@melbpc.org.au              |
- |                                                                           |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- |   unsigned FPU_shrx(void *arg1, unsigned arg2)                            |
- |                                                                           |
- |   Extended shift right function.                                          |
- |   Fastest for small shifts.                                               |
- |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           |
- |   right by the number of bits specified by the second arg (arg2).         |
- |   Forms a 96 bit quantity from the 64 bit arg and eax:                    |
- |                [  64 bit arg ][ eax ]                                     |
- |            shift right  --------->                                        |
- |   The eax register is initialized to 0 before the shifting.               |
- |   Results returned in the 64 bit arg and eax.                             |
- +---------------------------------------------------------------------------*/
-
-
-#include "fpu_emu.h"
-
-unsigned FPU_shrx(void *arg1, u32 arg2)
-{
-  u32 x;
-
-  if ( arg2 >= 64 )
-    {
-      if ( arg2 >= 96 )
-	{
-	  *(u64 *)arg1 = 0;
-	  return 0;
-	}
-      arg2 -= 64;
-      x = (*(u64 *)arg1) >> 32;
-      *(u64 *)arg1 = 0;
-
-      if ( arg2 )
-	return x >> arg2;
-      else
-	return x;
-    }
-
-  if ( arg2 < 32 )
-    {
-      if ( arg2 == 0 )
-	return 0;
-
-      x = (*(u64 *)arg1) << (32 - arg2);
-    }
-  else if ( arg2 > 32 )
-    {
-      x = (*(u64 *)arg1) >> (arg2 - 32);
-    }
-  else
-    {
-      /* arg2 == 32 */
-      x = *(u64 *)arg1;
-    }
-
-  (*(u64 *)arg1) >>= arg2;
-
-  return x;
-
-}
-
-
-/*---------------------------------------------------------------------------+
- |   unsigned FPU_shrxs(void *arg1, unsigned arg2)                           |
- |                                                                           |
- |   Extended shift right function (optimized for small floating point       |
- |   integers).                                                              |
- |   Shifts the 64 bit quantity pointed to by the first arg (arg1)           |
- |   right by the number of bits specified by the second arg (arg2).         |
- |   Forms a 96 bit quantity from the 64 bit arg and eax:                    |
- |                [  64 bit arg ][ eax ]                                     |
- |            shift right  --------->                                        |
- |   The eax register is initialized to 0 before the shifting.               |
- |   The lower 8 bits of eax are lost and replaced by a flag which is        |
- |   set (to 0x01) if any bit, apart from the first one, is set in the       |
- |   part which has been shifted out of the arg.                             |
- |   Results returned in the 64 bit arg and eax.                             |
- +---------------------------------------------------------------------------*/
-
-unsigned FPU_shrxs(void *arg1, u32 arg2)
-{
-  u32 x, bits;
-  u64 lost;
-
-  if ( arg2 >= 64 )
-    {
-      if ( arg2 >= 96 )
-	{
-	  bits = *(u64 *)arg1 != 0;
-	  *(u64 *)arg1 = 0;
-	  return bits ? 1 : 0;
-	}
-      arg2 -= 64;
-      lost = (*(u64 *)arg1) << (32 - arg2);
-      x = (*(u64 *)arg1) >> 32;
-      *(u64 *)arg1 = 0;
-
-      if ( arg2 )
-	x >>= arg2;
-
-      if ( lost )
-	x |= 1;
-
-      return x;
-    }
-
-  if ( arg2 < 32 )
-    {
-      if ( arg2 == 0 )
-	/* No bits are lost */
-	return 0;
-
-      /* No bits are lost */
-      x = (*(u64 *)arg1) << (32 - arg2);
-    }
-  else if ( arg2 > 32 )
-    {
-      bits = (*(u64 *)arg1);
-      bits <<= (64 - arg2);
-      x = (*(u64 *)arg1) >> (arg2 - 32);
-      if ( bits )
-	x |= 1;
-    }
-  else
-    {
-      /* arg2 == 32 */
-      /* No bits are lost */
-      x = *(u64 *)arg1;
-    }
-
-  (*(u64 *)arg1) >>= arg2;
-
-  if ( x & 0x7fffffff )
-    x |= 1;
-
-  return x;
-
-}
-
--- a/bochs/fpu/wm_sqrt.c
+++ b/bochs/fpu/wm_sqrt.c
@ -1,335 +0,0 @@
-/*---------------------------------------------------------------------------+
- |  wm_sqrt.c                                                                |
- |  $Id: wm_sqrt.c,v 1.5 2003-10-05 12:26:11 sshwarts Exp $
- |                                                                           |
- | Fixed point arithmetic square root evaluation.                            |
- |                                                                           |
- | Copyright (C) 1992,1993,1995,1997,1999                                    |
- |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
- |                       Australia.  E-mail billm@melbpc.org.au              |
- |                                                                           |
- +---------------------------------------------------------------------------*/
-
-/*---------------------------------------------------------------------------+
- |    returns the square root of n in n.                                     |
- |                                                                           |
- |  Use Newton's method to compute the square root of a number, which must   |
- |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
- |  Does not check the sign or tag of the argument.                          |
- |  Sets the exponent, but not the sign or tag of the result.                |
- |                                                                           |
- |  The guess is kept in %esi:%edi                                           |
- +---------------------------------------------------------------------------*/
-
-#include "exception.h"
-#include "fpu_emu.h"
-
-/*
-  The following value indicates the trailing bits (of 96 bits)
-  which may be in error when the final Newton iteration is finished
-  (0x20 corresponds to the last 5 bits in error, i.e. 91 bits precision).
-  A check of the following code with more than 3 billion (3.0e9) random
-  and selected values showed that 0x10 was always a large enough value,
-  so 0x20 should be a conservative choice.
- */
-#define ERR_MARGIN 0x20
-
-
-int wm_sqrt(FPU_REG *n, u16 control_w, u8 sign)
-{
-  u64 nn, guess, halfn, lowr, mid, upr, diff, uwork;
-  s64 work;
-  u32 ne, guess32, work32, diff32, mid32;
-  int shifted;
-
-  nn = significand(n);
-  ne = 0;
-  if ( exponent16(n) == EXP_BIAS )
-    {
-      /* Shift the argument right one position. */
-      if ( nn & 1 )
-	ne = 0x80000000;
-      nn >>= 1;
-      guess = n->sigh >> 2;
-      shifted = 1;
-    }
-  else
-    {
-      guess = n->sigh >> 1;
-      shifted = 0;
-    }
-
-  guess += 0x40000000;
-  guess *= 0xaaaaaaaa;
-  guess <<= 1;
-  guess32 = guess >> 32;
-  if ( !(guess32 & 0x80000000) )
-    guess32 = 0x80000000;
-  halfn = nn >> 1;
-
-  guess32 = halfn / guess32 + (guess32 >> 1);
-  guess32 = halfn / guess32 + (guess32 >> 1);
-  guess32 = halfn / guess32 + (guess32 >> 1);
-
-/*
- * Now that an estimate accurate to about 30 bits has been obtained,
- * we improve it to 60 bits or so.
- *
- * The strategy from now on is to compute new estimates from
- *      guess := guess + (n - guess^2) / (2 * guess)
- */
-
-  work = guess32;
-  work = nn - work * guess32;
-  work <<= 28;       /* 29 - 1 */
-  work /= guess32;
-  work <<= 3;        /* 29 + 3 = 32 */
-  work += ((u64)guess32) << 32;
-
-  if ( work == 0 )  /* This happens in one or two special cases */
-    work = BX_CONST64(0xffffffffffffffff);
-
-  guess = work;
-
-  /* guess is now accurate to about 60 bits */
-
-  if ( work > 0 )
-    {
-#ifdef PARANOID
-      if ( (n->sigh != 0xffffffff) && (n->sigl != 0xffffffff) )
-	{
-	  INTERNAL(0x213);
-	}
-#endif
-      /* We know the answer here. */
-      return FPU_round(n, 0x7fffffff, control_w, sign);
-    }
-
-  /* Refine the guess to significantly more than 64 bits. */
-
-  /* First, square the current guess. */
-
-  guess32 = guess >> 32;
-  work32 = guess;
-
-  /* lower 32 times lower 32 */
-  lowr = work32;
-  lowr *= work32;
-
-  /* lower 32 times upper 32 */
-  mid = guess32;
-  mid *= work32;
-
-  /* upper 32 times upper 32 */
-  upr = guess32;
-  upr *= guess32;
-
-  /* upper 32 bits of the middle product times 2 */
-  upr += mid >> (32-1);
-
-  /* lower 32 bits of the middle product times 2 */
-  work32 = mid << 1;
-
-  /* upper 32 bits of the lower product */
-  mid32 = lowr >> 32;
-  mid32 += work32;
-  if ( mid32 < work32 )
-    upr ++;
-
-  /* We now have the first 96 bits (truncated) of the square of the guess */
-
-  diff = upr - nn;
-  diff32 = mid32 - ne;
-  if ( diff32 > mid32 )
-    diff --;
-
-  if ( ((s64)diff) < 0 )
-    {
-      /* The difference is negative, negate it. */
-      diff32 = -((s32)diff32);
-      diff = ~diff;
-      if ( diff32 == 0 )
-	diff ++;
-#ifdef PARANOID
-      if ( (diff >> 32) != 0 )
-	{
-	  INTERNAL(0x207);
-	}
-#endif
-
-      diff <<= 32;
-      diff |= diff32;
-      work32 = diff / guess32;
-      work = work32;
-      work <<= 32;
-
-      diff = diff % guess32;
-      diff <<= 32;
-      work32 = diff / guess32;
-
-      work |= work32;
-
-      work >>= 1;
-      work32 = work >> 32;
-
-
-      guess += work32;       /* The first 64 bits */
-      guess32 = work;        /* The next 32 bits */
-      /* The guess should now be good to about 90 bits */
-    }
-  else
-    {
-      /* The difference is positive. */
-      diff <<= 32;
-      diff |= diff32;
-
-      work32 = diff / guess32;
-      work = work32;
-      work <<= 32;
-
-      diff = diff % guess32;
-      diff <<= 32;
-      work32 = diff / guess32;
-
-      work |= work32;
-
-      work >>= 1;
-      work32 = work >> 32;
-
-      guess32 = work;        /* The last 32 bits (of 96) */
-      guess32 = -guess32;
-      if ( guess32 )
-	guess --;
-      guess -= work32;       /* The first 64 bits */
-      /* The guess should now be good to about 90 bits */
-    }
-
-  setexponent16(n, 0);
-
-  if ( guess32 >= (u32) -ERR_MARGIN )
-    {
-      /* Nearly exact, we round the 64 bit result upward. */
-      guess ++;
-    }
-  else if ( (guess32 > ERR_MARGIN) &&
-	   ((guess32 < 0x80000000-ERR_MARGIN)
-	    || (guess32 > 0x80000000+ERR_MARGIN)) )
-    {
-      /* We have enough accuracy to decide rounding */
-      significand(n) = guess;
-      return FPU_round(n, guess32, control_w, sign);
-    }
-
-  if ( (guess32 <= ERR_MARGIN) || (guess32 >= (u32) -ERR_MARGIN) )
-    {
-      /*
-       * This is an easy case because x^1/2 is monotonic.
-       * We need just find the square of our estimate, compare it
-       * with the argument, and deduce whether our estimate is
-       * above, below, or exact. We use the fact that the estimate
-       * is known to be accurate to about 90 bits.
-       */
-
-
-      /* We compute the lower 64 bits of the 128 bit product */
-      work32 = guess;
-      lowr = work32;
-      lowr *= work32;
-
-      uwork = guess >> 32;
-      work32 = guess;
-      uwork *= work32;
-      uwork <<= 33;   /* 33 = 32+1 (for two times the product) */
-
-      lowr += uwork;  /* We now have the 64 bits */
-
-      /* We need only look at bits 65..96 of the square of guess. */
-      if ( shifted )
-	work32 = lowr >> 31;
-      else
-	work32 = lowr >> 32;
-
-#ifdef PARANOID
-      if ( ((s32)work32 > 3*ERR_MARGIN) || ((s32)work32 < -3*ERR_MARGIN) )
-	{
-	  INTERNAL(0x214);
-	}
-#endif
-
-      significand(n) = guess;
-      if ( (s32)work32 > 0 )
-	{
-	  /* guess is too large */
-	  significand(n) --;
-	  return FPU_round(n, 0xffffff00, control_w, sign);
-	}
-      else if ( (s32)work32 < 0 )
-	{
-	  /* guess is a little too small */
-	  return FPU_round(n, 0x000000ff, control_w, sign);
-	}
-
-      else if ( (u32)lowr != 0 )
-       {
-         /* guess is too large */
-         significand(n) --;
-         return FPU_round(n, 0xffffff00, control_w, sign);
-       }
-
-      /* Our guess is precise. */
-      return FPU_round(n, 0, control_w, sign);
-    }
-
-  /* Very similar to the case above, but the last bit is near 0.5.
-     We handle this just like the case above but we shift everything
-     by one bit. */
-
-  uwork = guess;
-  uwork <<= 1;
-  uwork |= 1;      /* add the half bit */
-
-  /* We compute the lower 64 bits of the 128 bit product */
-  work32 = uwork;
-  lowr = work32;
-  lowr *= work32;
-
-  work32 = uwork >> 32;
-  uwork &= 0xffffffff;
-  uwork *= work32;
-  uwork <<= 33;   /* 33 = 32+1 (for two times the product) */
-
-  lowr += uwork;  /* We now have the 64 bits. The lowest 32 bits of lowr
-                    are not all zero (the lsb is 1). */
-
-  /* We need only look at bits 65..96 of the square of guess. */
-  if ( shifted )
-    work32 = lowr >> 31;
-  else
-    work32 = lowr >> 32;
-
-#ifdef PARANOID
-  if ( ((s32)work32 > 4*3*ERR_MARGIN) || ((s32)work32 < -4*3*ERR_MARGIN) )
-    {
-      INTERNAL(0x215);
-    }
-#endif
-
-  significand(n) = guess;
-  if ( (s32)work32 < 0 )
-    {
-      /* guess plus half bit is a little too small */
-      return FPU_round(n, 0x800000ff, control_w, sign);
-    }
-  else /* Note that the lower 64 bits of the product are not all zero */
-    {
-      /* guess plus half bit is too large */
-      return FPU_round(n, 0x7fffff00, control_w, sign);
-    }
-
-  /*
-    Note that the result of a square root cannot have precisely a half bit
-    of a least significant place (it is left as an exercise for the reader
-    to prove this! (hint: 65 bit*65 bit => n bits)).
-  */
-
-}
--- a/bochs/osdep.h
+++ b/bochs/osdep.h
@ -1,5 +1,5 @@
 /////////////////////////////////////////////////////////////////////////
-// $Id: osdep.h,v 1.20 2004-02-08 10:22:29 cbothamy Exp $
+// $Id: osdep.h,v 1.21 2004-06-18 14:11:05 sshwarts Exp $
 /////////////////////////////////////////////////////////////////////////
 //
 //  Copyright (C) 2001  MandrakeSoft S.A.
@ -68,9 +68,9 @@ extern "C" {
 #undef BX_HAVE_VSNPRINTF
 #define BX_HAVE_SNPRINTF 1
 #define BX_HAVE_VSNPRINTF 1
-#else    /* ifnndef __MINGW32__ */
+#else   /* ifndef __MINGW32__ */
 #define FMT_LL "%ll"
-#endif  /* ifnndef __MINGW32__ */
+#endif  /* ifndef __MINGW32__ */
 #else    /* WIN32 */
 #define FMT_LL "%ll"
 #endif   /* WIN32 */