freeze softfloat-fpu-ver3 branhc and open it for free testing

I will continue the development in new softfloat-fpu-ver4 branch Current version already implements ALL FPU instructions except FSIN, FCOS, FSINCOS, FPTAN, FPATAN, FYL2XP1, F2XM1, FYL2X I think it solved all currently reported bugs and feature requests related to FPU code. Please write your own test programms and test the implemntation in all ways you can. Thanks, Stanislav
2004-04-09 12:29:50 +00:00 · 2004-04-09 12:29:50 +00:00 · 04124133c0
commit 04124133c0
parent 66f95e54c6
19 changed files with 9159 additions and 0 deletions
--- a/bochs/fpu/ferr.cc
+++ b/bochs/fpu/ferr.cc
@ -0,0 +1,86 @@
+/////////////////////////////////////////////////////////////////////////
+//  Copyright (C) 2004  MandrakeSoft S.A.
+//
+//    MandrakeSoft S.A.
+//    43, rue d'Aboukir
+//    75002 Paris - France
+//    http://www.linux-mandrake.com/
+//    http://www.mandrakesoft.com/
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+/////////////////////////////////////////////////////////////////////////
+
+#define NEED_CPU_REG_SHORTCUTS 1
+#include "bochs.h"
+#define LOG_THIS BX_CPU_THIS_PTR
+
+
+#if BX_SUPPORT_FPU
+
+#include "softfloat-specialize.h"
+
+void BX_CPU_C::FPU_stack_overflow(void)
+{
+  /* The masked response */
+  if (BX_CPU_THIS_PTR the_i387.is_IA_masked())
+  {
+      BX_CPU_THIS_PTR the_i387.FPU_push();
+      BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_default_nan, FPU_Tag_Special, 0);
+  }
+  BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Overflow);
+}
+
+void BX_CPU_C::FPU_stack_underflow(int stnr, int pop_stack)
+{
+  /* The masked response */
+  if (BX_CPU_THIS_PTR the_i387.is_IA_masked())
+  {
+     BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_default_nan, FPU_Tag_Special, stnr);
+     if (pop_stack) 
+          BX_CPU_THIS_PTR the_i387.FPU_pop();
+  }
+  BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+}
+
+/* Returns 1 if unmasked exception occured */
+int BX_CPU_C::FPU_exception(int exception)
+{
+  int unmasked = 0;
+
+  /* Extract only the bits which we use to set the status word */
+  exception &= (FPU_SW_Exceptions_Mask);
+
+  /* Set the corresponding exception bits */
+  FPU_PARTIAL_STATUS |= exception;
+
+  /* Set summary bits iff exception isn't masked */
+  if (FPU_PARTIAL_STATUS & ~FPU_CONTROL_WORD & FPU_CW_Exceptions_Mask)
+  {
+      FPU_PARTIAL_STATUS |= (FPU_SW_Summary | FPU_SW_Backward);
+      unmasked = 1;
+  }
+
+  if (exception & (FPU_SW_Stack_Fault | FPU_EX_Precision))
+  {
+      if (! (exception & FPU_SW_C1))
+        /* This bit distinguishes over- from underflow for a stack fault,
+             and roundup from round-down for precision loss. */
+        FPU_PARTIAL_STATUS &= ~FPU_SW_C1;
+  }
+
+  return unmasked;
+}
+
+#endif
--- a/bochs/fpu/fprem.cc
+++ b/bochs/fpu/fprem.cc
@ -0,0 +1,169 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+#include "softfloat-macros.h"
+
+/* executes single exponent reduction cycle */
+static Bit64u remainder_kernel(Bit64u aSig0, Bit64u bSig, int expDiff, Bit64u *zSig0, Bit64u *zSig1)
+{
+    Bit64u term0, term1, rem0, rem1;
+    Bit64u aSig1 = 0;
+
+    shortShift128Left(aSig1, aSig0, expDiff, &aSig1, &aSig0);
+    Bit64u q = estimateDiv128To64(aSig1, aSig0, bSig);
+    mul64To128(bSig, q, &term0, &term1);
+    sub128(aSig1, aSig0, term0, term1, &rem0, &rem1);
+    while ((Bit64s) rem0 < 0) {
+        --q;
+        add128(rem0, rem1, 0, bSig, &rem0, &rem1);
+    }
+    mul64To128(bSig, q, &term0, &term1);
+    sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
+    return q;
+}
+
+static floatx80 do_fprem(floatx80 a, floatx80 b, Bit64u &q, int rounding_mode, float_status_t &status)
+{
+    Bit32s aExp, bExp, zExp, expDiff;
+    Bit64u aSig0, aSig1, bSig;
+    int aSign;
+    q = 0;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig0 = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig0<<1)
+             || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) 
+        {
+            return propagateFloatx80NaN(a, b, status);
+        }
+        goto invalid;
+    }
+    if (bExp == 0x7FFF) {
+        if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        return a;
+    }
+    if (bExp == 0) {
+        if (bSig == 0) {
+ invalid:
+            float_raise(status, float_flag_invalid);
+            return floatx80_default_nan;
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    if (aExp == 0) {
+        if ((Bit64u) (aSig0<<1) == 0) return a;
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+    }
+    expDiff = aExp - bExp;
+    aSig1 = 0;
+
+    if (expDiff >= 64) {
+        int n = (expDiff & 0x1f) | 0x20;
+        remainder_kernel(aSig0, bSig, n, &aSig0, &aSig1);
+        zExp = aExp - n;
+        q = (Bit64u) -1;
+    }
+    else {
+        zExp = bExp;
+
+        if (expDiff < 0) {
+            if (expDiff < -1)
+                return (a.fraction & BX_CONST64(0x8000000000000000)) ? 
+                    packFloatx80(aSign, aExp, aSig0) : a;
+            shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
+            expDiff = 0;
+        }
+
+        if (expDiff > 0) {
+            q = remainder_kernel(aSig0, bSig, expDiff, &aSig0, &aSig1);
+        }
+        else {
+            q = (bSig <= aSig0);
+            if (q) 
+              aSig0 -= bSig;
+        }
+
+        if (rounding_mode == float_round_nearest_even)
+        {
+            Bit64u term0, term1;
+            shift128Right(bSig, 0, 1, &term0, &term1);
+
+            if (! lt128(aSig0, aSig1, term0, term1))
+            {
+                int lt = lt128(term0, term1, aSig0, aSig1);
+                int eq = eq128(aSig0, aSig1, term0, term1);
+                
+		if ((eq && (q & 1)) || lt) {
+		    aSign = !aSign;
+		    ++q;
+		}
+		if (lt) sub128(bSig, 0, aSig0, aSig1, &aSig0, &aSig1);
+            }
+        }
+    }
+
+    return normalizeRoundAndPackFloatx80(80, aSign, zExp, aSig0, aSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with respect to the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_ieee754_remainder(floatx80 a, floatx80 b, Bit64u &q, float_status_t &status)
+{
+    return do_fprem(a, b, q, float_round_nearest_even, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with  respect to  the corresponding value `b'. Unlike previous function
+| the  function  does not compute  the remainder  specified  in  the IEC/IEEE 
+| Standard  for Binary  Floating-Point  Arithmetic.  This  function  operates
+| differently  from the  previous  function in  the way  that it  rounds  the 
+| quotient of 'a' divided by 'b' to an integer.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_remainder(floatx80 a, floatx80 b, Bit64u &q, float_status_t &status)
+{
+    return do_fprem(a, b, q, float_round_to_zero, status);
+}
--- a/bochs/fpu/fpu_arith.cc
+++ b/bochs/fpu/fpu_arith.cc
--- a/bochs/fpu/fpu_compare.cc
+++ b/bochs/fpu/fpu_compare.cc
@ -0,0 +1,798 @@
+/////////////////////////////////////////////////////////////////////////
+//  Copyright (C) 2004  MandrakeSoft S.A.
+//
+//    MandrakeSoft S.A.
+//    43, rue d'Aboukir
+//    75002 Paris - France
+//    http://www.linux-mandrake.com/
+//    http://www.mandrakesoft.com/
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+/////////////////////////////////////////////////////////////////////////
+
+
+#define NEED_CPU_REG_SHORTCUTS 1
+#include "bochs.h"
+#define LOG_THIS BX_CPU_THIS_PTR
+
+
+#if BX_SUPPORT_FPU
+
+#include "softfloatx80.h"
+
+static int status_word_flags_fpu_compare(int float_relation)
+{
+  switch(float_relation) {
+     case float_relation_unordered:
+         return (FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+
+     case float_relation_greater:
+         return (0);
+
+     case float_relation_less:
+         return (FPU_SW_C0);
+
+     case float_relation_equal:
+         return (FPU_SW_C3);
+  }
+
+  return (-1);	// should never get here
+}
+#endif
+
+#if BX_SUPPORT_FPU || BX_SUPPORT_SSE >= 1
+void BX_CPU_C::write_eflags_fpu_compare(int float_relation)
+{
+  switch(float_relation) {
+   case float_relation_unordered:
+      setEFlagsOSZAPC(EFlagsZFMask | EFlagsPFMask | EFlagsCFMask);
+      break;
+
+   case float_relation_greater:
+      setEFlagsOSZAPC(0);
+      break;
+
+   case float_relation_less:
+      setEFlagsOSZAPC(EFlagsCFMask);
+      break;
+
+   case float_relation_equal:
+      setEFlagsOSZAPC(EFlagsZFMask);
+      break;
+
+   default:
+      BX_PANIC(("write_eflags: unknown floating point compare relation"));
+  }
+}
+#endif
+
+void BX_CPU_C::FCOM_STi(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(i->rm()))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          SETCC(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  softfloat_status_word_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), BX_READ_FPU_REG(i->rm()), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  SETCC(status_word_flags_fpu_compare(rc));
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FCOM(P)_STi: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FCOMI_ST0_STj(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->b1() & 4;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(i->rm()))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          setEFlagsOSZAPC(EFlagsZFMask | EFlagsPFMask | EFlagsCFMask);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  softfloat_status_word_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), BX_READ_FPU_REG(i->rm()), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FCOMI(P)_ST0_STj: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FUCOMI_ST0_STj(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->b1() & 4;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(i->rm()))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          setEFlagsOSZAPC(EFlagsZFMask | EFlagsPFMask | EFlagsCFMask);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  softfloat_status_word_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare_quiet(BX_READ_FPU_REG(0), BX_READ_FPU_REG(i->rm()), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_CPU_THIS_PTR write_eflags_fpu_compare(rc);
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FUCOMI(P)_ST0_STj: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FUCOM_STi(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(i->rm()))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          SETCC(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  softfloat_status_word_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare_quiet(BX_READ_FPU_REG(0), BX_READ_FPU_REG(i->rm()), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  SETCC(status_word_flags_fpu_compare(rc));
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FUCOM(P)_STi: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FCOM_SINGLE_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          SETCC(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  float32 load_reg;
+  read_virtual_dword(i->seg(), RMAddr(i), &load_reg);
+
+  softfloat_status_word_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), 
+  	float32_to_floatx80(load_reg, status), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  SETCC(status_word_flags_fpu_compare(rc));
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FCOM(P)_SINGLE_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FCOM_DOUBLE_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          SETCC(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  float64 load_reg;
+  read_virtual_qword(i->seg(), RMAddr(i), &load_reg);
+
+  softfloat_status_word_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), 
+  	float64_to_floatx80(load_reg, status), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  SETCC(status_word_flags_fpu_compare(rc));
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FCOM(P)_DOUBLE_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FICOM_WORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          SETCC(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  Bit16s load_reg;
+  read_virtual_word(i->seg(), RMAddr(i), (Bit16u*)(&load_reg));
+
+  softfloat_status_word_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), 
+  	int32_to_floatx80((Bit32s)(load_reg)), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  SETCC(status_word_flags_fpu_compare(rc));
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FICOM(P)_WORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FICOM_DWORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          SETCC(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+          if (pop_stack)
+              BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  Bit32s load_reg;
+  read_virtual_dword(i->seg(), RMAddr(i), (Bit32u*)(&load_reg));
+
+  softfloat_status_word_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), 
+  	int32_to_floatx80(load_reg), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  SETCC(status_word_flags_fpu_compare(rc));
+
+  if (pop_stack)
+      BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FICOM(P)_DWORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DE D9 */
+void BX_CPU_C::FCOMPP(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          SETCC(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+
+          BX_CPU_THIS_PTR the_i387.FPU_pop();
+          BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  softfloat_status_word_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), BX_READ_FPU_REG(1), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  SETCC(status_word_flags_fpu_compare(rc));
+
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FCOMPP: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DA E9 */
+void BX_CPU_C::FUCOMPP(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          SETCC(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+
+          BX_CPU_THIS_PTR the_i387.FPU_pop();
+          BX_CPU_THIS_PTR the_i387.FPU_pop();
+      }
+      return;
+  }
+
+  softfloat_status_word_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare_quiet(BX_READ_FPU_REG(0), BX_READ_FPU_REG(1), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  SETCC(status_word_flags_fpu_compare(rc));
+
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FUCOMPP: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DA C0 */
+void BX_CPU_C::FCMOVB_ST0_STj(bxInstruction_c *i)
+{
+#if (BX_CPU_LEVEL >= 6) || (BX_CPU_LEVEL_HACKED >= 6)
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  int sti_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(i->rm());
+
+  if (st0_tag == FPU_Tag_Empty || sti_tag == FPU_Tag_Empty)
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  floatx80 sti_reg = BX_READ_FPU_REG(i->rm());
+
+  if (get_CF())
+     BX_WRITE_FPU_REGISTER_AND_TAG(sti_reg, sti_tag, 0);
+ 
+#else
+  BX_INFO(("FCMOVB_ST0_STj: required P6 FPU, configure --enable-fpu, cpu-level=6"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* DA C8 */
+void BX_CPU_C::FCMOVE_ST0_STj(bxInstruction_c *i)
+{
+#if (BX_CPU_LEVEL >= 6) || (BX_CPU_LEVEL_HACKED >= 6)
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  int sti_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(i->rm());
+
+  if (st0_tag == FPU_Tag_Empty || sti_tag == FPU_Tag_Empty)
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  floatx80 sti_reg = BX_READ_FPU_REG(i->rm());
+
+  if (get_ZF())
+     BX_WRITE_FPU_REGISTER_AND_TAG(sti_reg, sti_tag, 0);
+
+#else
+  BX_INFO(("FCMOVE_ST0_STj: required P6 FPU, configure --enable-fpu, cpu-level=6"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* DA D0 */
+void BX_CPU_C::FCMOVBE_ST0_STj(bxInstruction_c *i)
+{
+#if (BX_CPU_LEVEL >= 6) || (BX_CPU_LEVEL_HACKED >= 6)
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  int sti_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(i->rm());
+
+  if (st0_tag == FPU_Tag_Empty || sti_tag == FPU_Tag_Empty)
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  floatx80 sti_reg = BX_READ_FPU_REG(i->rm());
+
+  if (get_CF() || get_ZF())
+     BX_WRITE_FPU_REGISTER_AND_TAG(sti_reg, sti_tag, 0);
+
+#else
+  BX_INFO(("FCMOVBE_ST0_STj: required P6 FPU, configure --enable-fpu, cpu-level=6"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* DA D8 */
+void BX_CPU_C::FCMOVU_ST0_STj(bxInstruction_c *i)
+{
+#if (BX_CPU_LEVEL >= 6) || (BX_CPU_LEVEL_HACKED >= 6)
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  int sti_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(i->rm());
+
+  if (st0_tag == FPU_Tag_Empty || sti_tag == FPU_Tag_Empty)
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  floatx80 sti_reg = BX_READ_FPU_REG(i->rm());
+
+  if (get_PF())
+     BX_WRITE_FPU_REGISTER_AND_TAG(sti_reg, sti_tag, 0);
+
+#else
+  BX_INFO(("FCMOVU_ST0_STj: required P6 FPU, configure --enable-fpu, cpu-level=6"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* DB C0 */
+void BX_CPU_C::FCMOVNB_ST0_STj(bxInstruction_c *i)
+{
+#if (BX_CPU_LEVEL >= 6) || (BX_CPU_LEVEL_HACKED >= 6)
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  int sti_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(i->rm());
+
+  if (st0_tag == FPU_Tag_Empty || sti_tag == FPU_Tag_Empty)
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  floatx80 sti_reg = BX_READ_FPU_REG(i->rm());
+
+  if (! get_CF())
+     BX_WRITE_FPU_REGISTER_AND_TAG(sti_reg, sti_tag, 0);
+
+#else
+  BX_INFO(("FCMOVNB_ST0_STj: required P6 FPU, configure --enable-fpu, cpu-level=6"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* DB C8 */
+void BX_CPU_C::FCMOVNE_ST0_STj(bxInstruction_c *i)
+{
+#if (BX_CPU_LEVEL >= 6) || (BX_CPU_LEVEL_HACKED >= 6)
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  int sti_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(i->rm());
+
+  if (st0_tag == FPU_Tag_Empty || sti_tag == FPU_Tag_Empty)
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  floatx80 sti_reg = BX_READ_FPU_REG(i->rm());
+
+  if (! get_ZF())
+     BX_WRITE_FPU_REGISTER_AND_TAG(sti_reg, sti_tag, 0);
+
+#else
+  BX_INFO(("FCMOVNE_ST0_STj: required P6 FPU, configure --enable-fpu, cpu-level=6"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* DB D0 */
+void BX_CPU_C::FCMOVNBE_ST0_STj(bxInstruction_c *i)
+{
+#if (BX_CPU_LEVEL >= 6) || (BX_CPU_LEVEL_HACKED >= 6)
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  int sti_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(i->rm());
+
+  if (st0_tag == FPU_Tag_Empty || sti_tag == FPU_Tag_Empty)
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  floatx80 sti_reg = BX_READ_FPU_REG(i->rm());
+
+  if ((!get_CF()) && (!get_ZF()))
+     BX_WRITE_FPU_REGISTER_AND_TAG(sti_reg, sti_tag, 0);
+
+#else
+  BX_INFO(("FCMOVNBE_ST0_STj: required P6 FPU, configure --enable-fpu, cpu-level=6"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* DB D8 */
+void BX_CPU_C::FCMOVNU_ST0_STj(bxInstruction_c *i)
+{
+#if (BX_CPU_LEVEL >= 6) || (BX_CPU_LEVEL_HACKED >= 6)
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  int sti_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(i->rm());
+
+  if (st0_tag == FPU_Tag_Empty || sti_tag == FPU_Tag_Empty)
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  floatx80 sti_reg = BX_READ_FPU_REG(i->rm());
+
+  if (! get_PF())
+     BX_WRITE_FPU_REGISTER_AND_TAG(sti_reg, sti_tag, 0);
+
+#else
+  BX_INFO(("FCMOVNU_ST0_STj: required P6 FPU, configure --enable-fpu, cpu-level=6"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* D9 E4 */
+void BX_CPU_C::FTST(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          /* the masked response */
+          SETCC(FPU_SW_C0|FPU_SW_C2|FPU_SW_C3);
+      }
+      return;
+  }
+
+  softfloat_status_word_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  int rc = floatx80_compare(BX_READ_FPU_REG(0), Const_Z, status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  SETCC(status_word_flags_fpu_compare(rc));
+#else
+  BX_INFO(("FTST: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 E5 */
+void BX_CPU_C::FXAM(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  floatx80 reg = BX_READ_FPU_REG(0);
+  int sign = floatx80_sign(reg);
+
+  /* 
+   * Examine the contents of the ST(0) register and sets the condition 
+   * code flags C0, C2 and C3 in the FPU status word to indicate the 
+   * class of value or number in the register.
+   */
+
+  if (IS_TAG_EMPTY(0))
+  {
+      SETCC(FPU_SW_C3|FPU_SW_C1|FPU_SW_C0);
+  }
+  else
+  {
+      float_class_t aClass = floatx80_class(reg);
+
+      switch(aClass)
+      {
+        case float_zero:
+           SETCC(FPU_SW_C3|FPU_SW_C1);
+           break;
+       
+        case float_NaN:
+           // unsupported handled as NaNs
+           if (floatx80_is_unsupported(reg)) {
+               SETCC(FPU_SW_C1); 
+           } else {
+               SETCC(FPU_SW_C1|FPU_SW_C0);
+           }
+           break;
+       
+        case float_negative_inf:
+        case float_positive_inf:
+           SETCC(FPU_SW_C2|FPU_SW_C1|FPU_SW_C0);
+           break;
+       
+        case float_denormal:
+           SETCC(FPU_SW_C3|FPU_SW_C2|FPU_SW_C1);
+           break;
+       
+        case float_normalized:
+           SETCC(FPU_SW_C2|FPU_SW_C1);
+           break;
+      }
+  }
+
+  /* 
+   * The C1 flag is set to the sign of the value in ST(0), regardless 
+   * of whether the register is empty or full.
+   */
+  if (! sign)
+    clear_C1();
+
+#else
+  BX_INFO(("FXAM: required FPU, configure --enable-fpu"));
+#endif
+}
--- a/bochs/fpu/fpu_const.cc
+++ b/bochs/fpu/fpu_const.cc
@ -0,0 +1,204 @@
+/////////////////////////////////////////////////////////////////////////
+//  Copyright (C) 2004  MandrakeSoft S.A.
+//
+//    MandrakeSoft S.A.
+//    43, rue d'Aboukir
+//    75002 Paris - France
+//    http://www.linux-mandrake.com/
+//    http://www.mandrakesoft.com/
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+/////////////////////////////////////////////////////////////////////////
+
+
+#define NEED_CPU_REG_SHORTCUTS 1
+#include "bochs.h"
+#define LOG_THIS BX_CPU_THIS_PTR
+
+
+#if BX_SUPPORT_FPU
+
+#include "softfloatx80.h"
+
+const floatx80 Const_QNaN = packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+const floatx80 Const_Z    = packFloatx80(0, 0x0000, 0);
+const floatx80 Const_1    = packFloatx80(0, 0x3fff, BX_CONST64(0x8000000000000000));
+const floatx80 Const_L2T  = packFloatx80(0, 0x4000, BX_CONST64(0xd49a784bcd1b8afe));
+const floatx80 Const_L2E  = packFloatx80(0, 0x3fff, BX_CONST64(0xb8aa3b295c17f0bc));
+const floatx80 Const_PI   = packFloatx80(0, 0x4000, BX_CONST64(0xc90fdaa22168c235));
+const floatx80 Const_PI2  = packFloatx80(0, 0x3fff, BX_CONST64(0xc90fdaa22168c235));
+const floatx80 Const_PI4  = packFloatx80(0, 0x3ffe, BX_CONST64(0xc90fdaa22168c235));
+const floatx80 Const_LG2  = packFloatx80(0, 0x3ffd, BX_CONST64(0x9a209a84fbcff799));
+const floatx80 Const_LN2  = packFloatx80(0, 0x3ffe, BX_CONST64(0xb17217f7d1cf79ac));
+const floatx80 Const_INF  = packFloatx80(0, 0x7fff, BX_CONST64(0x8000000000000000));
+
+/* A fast way to find out whether x is one of RC_DOWN or RC_CHOP
+   (and not one of RC_RND or RC_UP).
+   */
+#define DOWN_OR_CHOP()  (FPU_CONTROL_WORD & FPU_CW_RC & FPU_RC_DOWN)
+
+BX_CPP_INLINE floatx80 FPU_round_const(const floatx80 &a, int adj)
+{
+  floatx80 result = a;
+  result.fraction += adj;
+  return result;
+}
+
+#endif
+
+void BX_CPU_C::FLDL2T(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(FPU_round_const(Const_L2T, 
+	(FPU_CONTROL_WORD == FPU_RC_UP) ? 1 : 0), FPU_Tag_Valid, 0);
+#else
+  BX_INFO(("FLDL2T: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLDL2E(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(FPU_round_const(Const_L2E, 
+	DOWN_OR_CHOP() ? -1 : 0), FPU_Tag_Valid, 0);
+#else
+  BX_INFO(("FLDL2E: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLDPI(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(FPU_round_const(Const_PI, 
+	DOWN_OR_CHOP() ? -1 : 0), FPU_Tag_Valid, 0);
+#else
+  BX_INFO(("FLDPI: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLDLG2(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(FPU_round_const(Const_LG2, 
+	DOWN_OR_CHOP() ? -1 : 0), FPU_Tag_Valid, 0);
+#else
+  BX_INFO(("FLDLG2: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLDLN2(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(FPU_round_const(Const_LN2, 
+	DOWN_OR_CHOP() ? -1 : 0), FPU_Tag_Valid, 0);
+#else
+  BX_INFO(("FLDLN2: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLD1(bxInstruction_c *i) 
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(Const_1, FPU_Tag_Valid, 0);
+#else
+  BX_INFO(("FLD1: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLDZ(bxInstruction_c *i)                      
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(Const_Z, FPU_Tag_Zero, 0);
+#else
+  BX_INFO(("FLDZ: required FPU, configure --enable-fpu"));
+#endif
+}
--- a/bochs/fpu/fpu_load_store.cc
+++ b/bochs/fpu/fpu_load_store.cc
@ -0,0 +1,680 @@
+/////////////////////////////////////////////////////////////////////////
+//  Copyright (C) 2004  MandrakeSoft S.A.
+//
+//    MandrakeSoft S.A.
+//    43, rue d'Aboukir
+//    75002 Paris - France
+//    http://www.linux-mandrake.com/
+//    http://www.mandrakesoft.com/
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+/////////////////////////////////////////////////////////////////////////
+
+
+#define NEED_CPU_REG_SHORTCUTS 1
+#include "bochs.h"
+#define LOG_THIS BX_CPU_THIS_PTR
+
+
+#if BX_SUPPORT_FPU
+#include "softfloatx80.h"
+#endif
+
+void BX_CPU_C::FLD_STi(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  if (IS_TAG_EMPTY(i->rm()))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  int sti_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(i->rm());
+  floatx80 sti_reg = BX_READ_FPU_REG(i->rm());
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REGISTER_AND_TAG(sti_reg, sti_tag, 0);
+#else
+  BX_INFO(("FLD_STi: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLD_SINGLE_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  float32 load_reg;
+  read_virtual_dword(i->seg(), RMAddr(i), &load_reg);
+
+  softfloat_status_word_t status = 
+     FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  // convert to floatx80 format
+  floatx80 result = float32_to_floatx80(load_reg, status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+     return;
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FLD_SINGLE_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLD_DOUBLE_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  float64 load_reg;
+  read_virtual_qword(i->seg(), RMAddr(i), &load_reg);
+
+  softfloat_status_word_t status = 
+     FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  // convert to floatx80 format
+  floatx80 result = float64_to_floatx80(load_reg, status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+     return;
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FLD_DOUBLE_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FLD_EXTENDED_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  floatx80 result;
+  read_virtual_tword(i->seg(), RMAddr(i), &result);
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FLD_EXTENDED_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DF /0 */
+void BX_CPU_C::FILD_WORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  Bit16s load_reg;
+  read_virtual_word(i->seg(), RMAddr(i), (Bit16u*)(&load_reg));
+  floatx80 result = int32_to_floatx80((Bit32s) load_reg);
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FILD_WORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DB /0 */
+void BX_CPU_C::FILD_DWORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  Bit32s load_reg;
+  read_virtual_dword(i->seg(), RMAddr(i), (Bit32u*)(&load_reg));
+  floatx80 result = int32_to_floatx80(load_reg);
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FILD_DWORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DF /5 */
+void BX_CPU_C::FILD_QWORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  Bit64s load_reg;
+  read_virtual_qword(i->seg(), RMAddr(i), (Bit64u*)(&load_reg));
+  floatx80 result = int64_to_floatx80(load_reg);
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FILD_QWORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DF /4 */
+void BX_CPU_C::FBLD_PACKED_BCD(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1))
+  {
+      BX_CPU_THIS_PTR FPU_stack_overflow();
+      return; 
+  }
+
+  Bit16u hi2;
+  Bit64u lo8;
+
+  // read packed bcd from memory
+  read_virtual_qword(i->seg(), RMAddr(i),     &lo8);
+  read_virtual_word (i->seg(), RMAddr(i) + 8, &hi2);
+
+  Bit64s scale = 1; 
+  Bit64s val64 = 0;
+
+  for (int i = 0; i < 16; i++)
+  {
+    val64 += (lo8 & 0x0F) * scale;
+    lo8 >>= 4;
+    scale *= 10;
+  }
+
+  val64 += (hi2 & 0x0F) * scale;
+  val64 += ((hi2>>4) & 0x0F) * scale * 10;
+
+  floatx80 result = int64_to_floatx80(val64);
+  if (hi2 & 0x8000)	// set negative
+      floatx80_chs(result);
+
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FBLD_PACKED_BCD: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FST_STi(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int pop_stack = i->nnn() & 1;
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  if (st0_tag == FPU_Tag_Empty)
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(i->rm(), pop_stack);
+     return;
+  }
+
+  floatx80 st0_reg = BX_READ_FPU_REG(0);
+
+  BX_WRITE_FPU_REGISTER_AND_TAG(st0_reg, st0_tag, i->rm());
+  if (pop_stack)
+     BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FST(P)_STi: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FST_SINGLE_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  float32 save_reg = float32_default_nan; /* The masked response */
+
+  int pop_stack = i->nnn() & 1;
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     softfloat_status_word_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     save_reg = floatx80_to_float32(BX_READ_FPU_REG(0), status);
+
+     if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+        return;
+  }
+
+  write_virtual_dword(i->seg(), RMAddr(i), &save_reg);
+
+  if (pop_stack)
+     BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FST(P)_SINGLE_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FST_DOUBLE_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  float64 save_reg = float64_default_nan; /* The masked response */
+
+  int pop_stack = i->nnn() & 1;
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     softfloat_status_word_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     save_reg = floatx80_to_float64(BX_READ_FPU_REG(0), status);
+
+     if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+        return;
+  }
+
+  write_virtual_qword(i->seg(), RMAddr(i), &save_reg);
+
+  if (pop_stack)
+     BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FST(P)_DOUBLE_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DB /7 */
+void BX_CPU_C::FSTP_EXTENDED_REAL(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  floatx80 save_reg = floatx80_default_nan; /* The masked response */
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     save_reg = BX_READ_FPU_REG(0);
+  }
+
+  write_virtual_tword(i->seg(), RMAddr(i), &save_reg);
+
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FSTP_EXTENDED_REAL: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FIST_WORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  Bit16s save_reg = int16_indefinite;
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+      softfloat_status_word_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+      save_reg = floatx80_to_int16(BX_READ_FPU_REG(0), status);
+
+      if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+        return;
+  }
+
+  write_virtual_word(i->seg(), RMAddr(i), (Bit16u*)(&save_reg));
+
+  if (pop_stack)
+     BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FIST(P)_WORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FIST_DWORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  Bit32s save_reg = int32_indefinite; /* The masked response */
+
+  int pop_stack = i->nnn() & 1;
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     softfloat_status_word_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     save_reg = floatx80_to_int32(BX_READ_FPU_REG(0), status);
+
+     if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+         return;
+  }
+
+  write_virtual_dword(i->seg(), RMAddr(i), (Bit32u*)(&save_reg));
+
+  if (pop_stack)
+     BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FIST(P)_DWORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FISTP_QWORD_INTEGER(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  Bit64s save_reg = int64_indefinite; /* The masked response */
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     softfloat_status_word_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     save_reg = floatx80_to_int64(BX_READ_FPU_REG(0), status);
+
+     if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+         return;
+  }
+
+  write_virtual_qword(i->seg(), RMAddr(i), (Bit64u*)(&save_reg));
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FISTP_QWORD_INTEGER: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FBSTP_PACKED_BCD(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  /* 
+   * The packed BCD integer indefinite encoding (FFFFC000000000000000H) 
+   * is stored in response to a masked floating-point invalid-operation 
+   * exception.
+   */
+  Bit16u save_reg_hi = 0xFFFF;
+  Bit64u save_reg_lo = BX_CONST64(0xC000000000000000);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     softfloat_status_word_t status = 
+        FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     Bit64s save_val = floatx80_to_int64(BX_READ_FPU_REG(0), status);
+
+     int sign = (save_val < 0);
+     if (sign) 
+        save_val = -save_val;
+ 
+     if (save_val > BX_CONST64(999999999999999999))
+     {
+        float_raise(status, float_flag_invalid);
+     }
+     
+     if (! (status.float_exception_flags & float_flag_invalid))
+     {
+        save_reg_hi = (sign) ? 0x8000 : 0;
+        save_reg_lo = 0;
+
+        for (int i=0; i<16; i++)
+        {
+           save_reg_lo += ((Bit64u)(save_val % 10)) << (4*i);
+           save_val /= 10;
+        }
+
+        save_reg_hi += (save_val % 10);
+        save_val /= 10;
+        save_reg_hi += (save_val % 10) << 4;
+    }
+
+    /* check for fpu arithmetic exceptions */
+    if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+        return;
+  }
+
+  // write packed bcd to memory
+  write_virtual_qword(i->seg(), RMAddr(i),     &save_reg_lo);
+  write_virtual_word (i->seg(), RMAddr(i) + 8, &save_reg_hi);
+
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FBSTP_PACKED_BCD: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DF /1 */
+void BX_CPU_C::FISTTP16(bxInstruction_c *i)
+{
+#if BX_SUPPORT_PNI
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  Bit16s save_reg = int16_indefinite; /* The masked response */
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+      softfloat_status_word_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+      save_reg = floatx80_to_int16_round_to_zero(BX_READ_FPU_REG(0), status);
+
+      if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+        return;
+  }
+
+  write_virtual_word(i->seg(), RMAddr(i), (Bit16u*)(&save_reg));
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FISTTP16: required PNI, configure --enable-pni"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* DB /1 */
+void BX_CPU_C::FISTTP32(bxInstruction_c *i)
+{
+#if BX_SUPPORT_PNI
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  Bit32s save_reg = int32_indefinite; /* The masked response */
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     softfloat_status_word_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     save_reg = floatx80_to_int32_round_to_zero(BX_READ_FPU_REG(0), status);
+
+     if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+         return;
+  }
+
+  write_virtual_dword(i->seg(), RMAddr(i), (Bit32u*)(&save_reg));
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FISTTP32: required PNI, configure --enable-pni"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* DD /1 */
+void BX_CPU_C::FISTTP64(bxInstruction_c *i)
+{
+#if BX_SUPPORT_PNI
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  Bit64s save_reg = int64_indefinite; /* The masked response */
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0))
+  {
+     BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+     if (! (BX_CPU_THIS_PTR the_i387.is_IA_masked()))
+        return;
+  }
+  else
+  {
+     softfloat_status_word_t status = 
+         FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+     save_reg = floatx80_to_int64_round_to_zero(BX_READ_FPU_REG(0), status);
+
+     if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+         return;
+  }
+
+  write_virtual_qword(i->seg(), RMAddr(i), (Bit64u*)(&save_reg));
+  BX_CPU_THIS_PTR the_i387.FPU_pop();
+#else
+  BX_INFO(("FISTTP64: required PNI, configure --enable-pni"));
+  UndefinedOpcode(i);
+#endif
+}
--- a/bochs/fpu/fpu_misc.cc
+++ b/bochs/fpu/fpu_misc.cc
@ -0,0 +1,160 @@
+/////////////////////////////////////////////////////////////////////////
+//  Copyright (C) 2004  MandrakeSoft S.A.
+//
+//    MandrakeSoft S.A.
+//    43, rue d'Aboukir
+//    75002 Paris - France
+//    http://www.linux-mandrake.com/
+//    http://www.mandrakesoft.com/
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+/////////////////////////////////////////////////////////////////////////
+
+
+#define NEED_CPU_REG_SHORTCUTS 1
+#include "bochs.h"
+#define LOG_THIS BX_CPU_THIS_PTR
+
+
+#if BX_SUPPORT_FPU
+#include "softfloatx80.h"
+#endif
+
+/* D9 C8 */
+void BX_CPU_C::FXCH_STi(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+  int sti_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(i->rm());
+
+  floatx80 st0_reg = BX_READ_FPU_REG(0);
+  floatx80 sti_reg = BX_READ_FPU_REG(i->rm());
+
+  clear_C1();
+
+  if (st0_tag == FPU_Tag_Empty || sti_tag == FPU_Tag_Empty)
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Underflow);
+
+      if(BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+	  /* Masked response */
+          if (st0_tag == FPU_Tag_Empty)
+          {
+              st0_reg = floatx80_default_nan;
+              st0_tag = FPU_Tag_Special;
+          }
+          if (sti_tag == FPU_Tag_Empty)
+          {
+              sti_reg = floatx80_default_nan;
+              sti_tag = FPU_Tag_Special;
+          }
+      }
+      else return;
+  }
+
+  BX_WRITE_FPU_REGISTER_AND_TAG(st0_reg, st0_tag, i->rm());
+  BX_WRITE_FPU_REGISTER_AND_TAG(sti_reg, sti_tag, 0);
+#else
+  BX_INFO(("FXCH_STi: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 E0 */
+void BX_CPU_C::FCHS(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+
+  if (st0_tag == FPU_Tag_Empty)
+  {
+      BX_CPU_THIS_PTR FPU_stack_underflow(0);
+      return;
+  }
+
+  clear_C1();
+
+  floatx80 st0_reg = BX_READ_FPU_REG(0);
+  BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_chs(st0_reg), st0_tag, 0);
+#else
+  BX_INFO(("FCHS: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 E1 */
+void BX_CPU_C::FABS(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  int st0_tag = BX_CPU_THIS_PTR the_i387.FPU_gettagi(0);
+
+  if (st0_tag == FPU_Tag_Empty)
+  {
+      BX_CPU_THIS_PTR FPU_stack_underflow(0);
+      return;
+  }
+
+  clear_C1();
+
+  floatx80 st0_reg = BX_READ_FPU_REG(0);
+  BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_abs(st0_reg), st0_tag, 0);
+#else
+  BX_INFO(("FABS: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F6 */
+void BX_CPU_C::FDECSTP(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  BX_CPU_THIS_PTR the_i387.tos = (BX_CPU_THIS_PTR the_i387.tos-1) & 7;
+#else
+  BX_INFO(("FDECSTP: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F7 */
+void BX_CPU_C::FINCSTP(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  BX_CPU_THIS_PTR the_i387.tos = (BX_CPU_THIS_PTR the_i387.tos+1) & 7;
+#else
+  BX_INFO(("FINCSTP: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* DD C0 */
+void BX_CPU_C::FFREE_STi(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+  BX_CPU_THIS_PTR the_i387.FPU_settagi(FPU_Tag_Empty, i->rm());
+#else
+  BX_INFO(("FFREE_STi: required FPU, configure --enable-fpu"));
+#endif
+}
--- a/bochs/fpu/fpu_trans.cc
+++ b/bochs/fpu/fpu_trans.cc
@ -0,0 +1,328 @@
+/////////////////////////////////////////////////////////////////////////
+//  Copyright (C) 2004  MandrakeSoft S.A.
+//
+//    MandrakeSoft S.A.
+//    43, rue d'Aboukir
+//    75002 Paris - France
+//    http://www.linux-mandrake.com/
+//    http://www.mandrakesoft.com/
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+/////////////////////////////////////////////////////////////////////////
+
+
+#define NEED_CPU_REG_SHORTCUTS 1
+#include "bochs.h"
+#define LOG_THIS BX_CPU_THIS_PTR
+
+#if BX_SUPPORT_FPU
+#include "softfloatx80.h"
+#endif
+
+extern "C"
+{
+  void fsin   (FPU_REG *st0_ptr, Bit8u st0_tag);
+  void fcos   (FPU_REG *st0_ptr, Bit8u st0_tag);
+  void fsincos(FPU_REG *st0_ptr, Bit8u st0_tag);
+  void fptan  (FPU_REG *st0_ptr, Bit8u st0_tag);
+  void fpatan (FPU_REG *st0_ptr, Bit8u st0_tag);
+  void fyl2xp1(FPU_REG *st0_ptr, Bit8u st0_tag);
+  void f2xm1  (FPU_REG *st0_ptr, Bit8u st0_tag);
+  void fyl2x  (FPU_REG *st0_ptr, Bit8u st0_tag);
+}
+
+extern void FPU_initalize_i387(struct i387_t *the_i387);
+
+void BX_CPU_C::FSIN(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  FPU_initalize_i387((i387_t *)(&(BX_CPU_THIS_PTR the_i387)));
+
+  fsin(&(BX_FPU_READ_ST0()), 
+	BX_CPU_THIS_PTR the_i387.FPU_gettagi(0));
+#else
+  BX_INFO(("FSIN: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FCOS(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  FPU_initalize_i387((i387_t *)(&(BX_CPU_THIS_PTR the_i387)));
+
+  fcos(&(BX_FPU_READ_ST0()), 
+	BX_CPU_THIS_PTR the_i387.FPU_gettagi(0));
+#else
+  BX_INFO(("FCOS: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FSINCOS(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  FPU_initalize_i387((i387_t *)(&(BX_CPU_THIS_PTR the_i387)));
+
+  fsincos(&(BX_FPU_READ_ST0()), 
+	BX_CPU_THIS_PTR the_i387.FPU_gettagi(0));
+#else
+  BX_INFO(("FSINCOS: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FPTAN(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  FPU_initalize_i387((i387_t *)(&(BX_CPU_THIS_PTR the_i387)));
+
+  fptan(&(BX_FPU_READ_ST0()), 
+	BX_CPU_THIS_PTR the_i387.FPU_gettagi(0));
+#else
+  BX_INFO(("FPTAN: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FPATAN(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  FPU_initalize_i387((i387_t *)(&(BX_CPU_THIS_PTR the_i387)));
+
+  fpatan(&(BX_FPU_READ_ST0()), 
+	BX_CPU_THIS_PTR the_i387.FPU_gettagi(0));
+#else
+  BX_INFO(("FPATAN: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FYL2XP1(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  FPU_initalize_i387((i387_t *)(&(BX_CPU_THIS_PTR the_i387)));
+
+  fyl2xp1(&(BX_FPU_READ_ST0()), 
+	BX_CPU_THIS_PTR the_i387.FPU_gettagi(0));
+#else
+  BX_INFO(("FYL2XP1: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::F2XM1(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  FPU_initalize_i387((i387_t *)(&(BX_CPU_THIS_PTR the_i387)));
+
+  f2xm1(&(BX_FPU_READ_ST0()), 
+	BX_CPU_THIS_PTR the_i387.FPU_gettagi(0));
+#else
+  BX_INFO(("F2XM1: required FPU, configure --enable-fpu"));
+#endif
+}
+
+void BX_CPU_C::FYL2X(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  FPU_initalize_i387((i387_t *)(&(BX_CPU_THIS_PTR the_i387)));
+
+  fyl2x(&(BX_FPU_READ_ST0()), 
+	BX_CPU_THIS_PTR the_i387.FPU_gettagi(0));
+#else
+  BX_INFO(("FYL2X: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F4 */
+void BX_CPU_C::FXTRACT(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (! IS_TAG_EMPTY(-1) || IS_TAG_EMPTY(0))
+  {
+      BX_CPU_THIS_PTR FPU_exception(FPU_EX_Stack_Overflow);
+
+      /* The masked response */
+      if (BX_CPU_THIS_PTR the_i387.is_IA_masked())
+      {
+          BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_default_nan, FPU_Tag_Special, 0);
+          BX_CPU_THIS_PTR the_i387.FPU_push();
+          BX_WRITE_FPU_REGISTER_AND_TAG(floatx80_default_nan, FPU_Tag_Special, 0);
+      }
+
+      return; 
+  }
+
+  softfloat_status_word_t status = 
+      FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  floatx80 a = BX_READ_FPU_REG(0);
+  floatx80 b = floatx80_extract(a, status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_WRITE_FPU_REG(b, 0);	// exponent
+  BX_CPU_THIS_PTR the_i387.FPU_push();
+  BX_WRITE_FPU_REG(a, 0);	// fraction
+#else
+  BX_INFO(("FXTRACT: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F5 */
+void BX_CPU_C::FPREM1(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  softfloat_status_word_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  Bit64u quotient;
+
+  floatx80 a = BX_READ_FPU_REG(0);
+  floatx80 b = BX_READ_FPU_REG(1);
+
+  floatx80 result = floatx80_ieee754_remainder(a, b, quotient, status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  int cc = 0;
+  if (quotient == (Bit64u) -1) cc = FPU_SW_C2;
+  else
+  {
+      if (quotient & 1) cc |= FPU_SW_C1;
+      if (quotient & 2) cc |= FPU_SW_C3;
+      if (quotient & 4) cc |= FPU_SW_C0;
+  }
+  SETCC(cc);
+
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FPREM1: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 F8 */
+void BX_CPU_C::FPREM(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  softfloat_status_word_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  Bit64u quotient;
+
+  floatx80 a = BX_READ_FPU_REG(0);
+  floatx80 b = BX_READ_FPU_REG(1);
+
+  floatx80 result = floatx80_remainder(a, b, quotient, status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  int cc = 0;
+  if (quotient == (Bit64u) -1) cc = FPU_SW_C2;
+  else
+  {
+      if (quotient & 1) cc |= FPU_SW_C1;
+      if (quotient & 2) cc |= FPU_SW_C3;
+      if (quotient & 4) cc |= FPU_SW_C0;
+  }
+  SETCC(cc);
+
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FPREM: required FPU, configure --enable-fpu"));
+#endif
+}
+
+/* D9 FD */
+void BX_CPU_C::FSCALE(bxInstruction_c *i)
+{
+#if BX_SUPPORT_FPU
+  BX_CPU_THIS_PTR prepareFPU(i);
+
+  clear_C1();
+
+  if (IS_TAG_EMPTY(0) || IS_TAG_EMPTY(1))
+  {
+     BX_CPU_THIS_PTR FPU_stack_underflow(0);
+     return;
+  }
+
+  softfloat_status_word_t status = 
+	FPU_pre_exception_handling(BX_CPU_THIS_PTR the_i387.get_control_word());
+
+  floatx80 result = floatx80_scale(BX_READ_FPU_REG(0), BX_READ_FPU_REG(1), status);
+
+  if (BX_CPU_THIS_PTR FPU_exception(status.float_exception_flags))
+      return;
+
+  BX_WRITE_FPU_REG(result, 0);
+#else
+  BX_INFO(("FSCALE: required FPU, configure --enable-fpu"));
+#endif
+}
--- a/bochs/fpu/linkage.h
+++ b/bochs/fpu/linkage.h
@ -0,0 +1,16 @@
+#ifndef _LINUX_LINKAGE_H
+#define _LINUX_LINKAGE_H
+
+#ifdef __cplusplus
+#define CPP_ASMLINKAGE extern "C"
+#else
+#define CPP_ASMLINKAGE
+#endif
+
+#if defined __i386__ && (__GNUC__ > 2 || __GNUC_MINOR__ > 7)
+#define asmlinkage CPP_ASMLINKAGE GCC_ATTRIBUTE((regparm(0)))
+#else
+#define asmlinkage CPP_ASMLINKAGE
+#endif
+
+#endif
--- a/bochs/fpu/softfloat-macros.h
+++ b/bochs/fpu/softfloat-macros.h
@ -0,0 +1,586 @@
+/*============================================================================
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#ifndef SOFTFLOAT_MACROS_H
+#define SOFTFLOAT_MACROS_H
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 32, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void shift32RightJamming(Bit32u a, Bit16s count, Bit32u *zPtr)
+{
+    Bit32u z;
+
+    if (count == 0) {
+        z = a;
+    }
+    else if (count < 32) {
+        z = (a>>count) | ((a<<((-count) & 31)) != 0);
+    }
+    else {
+        z = (a != 0);
+    }
+    *zPtr = z;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 64, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void shift64RightJamming(Bit64u a, Bit16s count, Bit64u *zPtr)
+{
+    Bit64u z;
+
+    if (count == 0) {
+        z = a;
+    }
+    else if (count < 64) {
+        z = (a>>count) | ((a<<((-count) & 63)) != 0);
+    }
+    else {
+        z = (a != 0);
+    }
+    *zPtr = z;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+| _plus_ the number of bits given in `count'.  The shifted result is at most
+| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
+| bits shifted off form a second 64-bit result as follows:  The _last_ bit
+| shifted off is the most-significant bit of the extra result, and the other
+| 63 bits of the extra result are all zero if and only if _all_but_the_last_
+| bits shifted off were all zero.  This extra result is stored in the location
+| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0' and `a1' are considered to form
+| a fixed-point value with binary point between `a0' and `a1'.  This fixed-
+| point value is shifted right by the number of bits given in `count', and
+| the integer part of the result is returned at the location pointed to by
+| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
+| described above, and is returned at the location pointed to by `z1Ptr'.)
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void
+ shift64ExtraRightJamming(
+     Bit64u a0, Bit64u a1, Bit16s count, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    Bit64u z0, z1;
+    Bit8s negCount = (-count) & 63;
+
+    if (count == 0) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if (count < 64) {
+        z1 = (a0<<negCount) | (a1 != 0);
+        z0 = a0>>count;
+    }
+    else {
+        if (count == 64) {
+            z1 = a0 | (a1 != 0);
+        }
+        else {
+            z1 = ((a0 | a1) != 0);
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
+| any carry out is lost.  The result is broken into two 64-bit pieces which
+| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void
+ add128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    Bit64u z1;
+
+    z1 = a1 + b1;
+    *z1Ptr = z1;
+    *z0Ptr = a0 + b0 + (z1 < a1);
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
+| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+| `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void
+ sub128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    *z1Ptr = a1 - b1;
+    *z0Ptr = a0 - b0 - (a1 < b1);
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
+| into two 64-bit pieces which are stored at the locations pointed to by
+| `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void mul64To128(Bit64u a, Bit64u b, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    Bit32u aHigh, aLow, bHigh, bLow;
+    Bit64u z0, zMiddleA, zMiddleB, z1;
+
+    aLow = a;
+    aHigh = a>>32;
+    bLow = b;
+    bHigh = b>>32;
+    z1 = ((Bit64u) aLow) * bLow;
+    zMiddleA = ((Bit64u) aLow) * bHigh;
+    zMiddleB = ((Bit64u) aHigh) * bLow;
+    z0 = ((Bit64u) aHigh) * bHigh;
+    zMiddleA += zMiddleB;
+    z0 += (((Bit64u) (zMiddleA < zMiddleB))<<32) + (zMiddleA>>32);
+    zMiddleA <<= 32;
+    z1 += zMiddleA;
+    z0 += (z1 < zMiddleA);
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the 64-bit integer quotient obtained by dividing
+| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
+| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
+| toward zero, the approximation returned lies between q and q + 2 inclusive.
+| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+| unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+static Bit64u estimateDiv128To64(Bit64u a0, Bit64u a1, Bit64u b)
+{
+    Bit64u b0, b1;
+    Bit64u rem0, rem1, term0, term1;
+    Bit64u z;
+
+    if (b <= a0) return BX_CONST64(0xFFFFFFFFFFFFFFFF);
+    b0 = b>>32;
+    z = (b0<<32 <= a0) ? BX_CONST64(0xFFFFFFFF00000000) : (a0 / b0)<<32;
+    mul64To128(b, z, &term0, &term1);
+    sub128(a0, a1, term0, term1, &rem0, &rem1);
+    while (((Bit64s) rem0) < 0) {
+        z -= BX_CONST64(0x100000000);
+        b1 = b<<32;
+        add128(rem0, rem1, b0, b1, &rem0, &rem1);
+    }
+    rem0 = (rem0<<32) | (rem1>>32);
+    z |= (b0<<32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the square root of the 32-bit significand given
+| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+| `aExp' (the least significant bit) is 1, the integer returned approximates
+| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+| case, the approximation returned lies strictly within +/-2 of the exact
+| value.
+*----------------------------------------------------------------------------*/
+
+static Bit32u estimateSqrt32(Bit16s aExp, Bit32u a)
+{
+    static const Bit16u sqrtOddAdjustments[] = {
+        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+    };
+    static const Bit16u sqrtEvenAdjustments[] = {
+        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+    };
+    Bit8s index;
+    Bit32u z;
+
+    index = (a>>27) & 15;
+    if (aExp & 1) {
+        z = 0x4000 + (a>>17) - sqrtOddAdjustments[index];
+        z = ((a / z)<<14) + (z<<15);
+        a >>= 1;
+    }
+    else {
+        z = 0x8000 + (a>>17) - sqrtEvenAdjustments[index];
+        z = a / z + z;
+        z = (0x20000 <= z) ? 0xFFFF8000 : (z<<15);
+        if (z <= a) return (Bit32u) (((Bit32s) a)>>1);
+    }
+    return ((Bit32u) ((((Bit64u) a)<<31) / z)) + (z>>1);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 32 is returned.
+*----------------------------------------------------------------------------*/
+
+static int countLeadingZeros32(Bit32u a)
+{
+    static const Bit8s countLeadingZerosHigh[] = {
+        8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+        3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    Bit8s shiftCount;
+
+    shiftCount = 0;
+    if (a < 0x10000) {
+        shiftCount += 16;
+        a <<= 16;
+    }
+    if (a < 0x1000000) {
+        shiftCount += 8;
+        a <<= 8;
+    }
+    shiftCount += countLeadingZerosHigh[ a>>24 ];
+    return shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 64 is returned.
+*----------------------------------------------------------------------------*/
+
+static int countLeadingZeros64(Bit64u a)
+{
+    Bit8s shiftCount;
+
+    shiftCount = 0;
+    if (a < ((Bit64u) 1)<<32) {
+        shiftCount += 32;
+    }
+    else {
+        a >>= 32;
+    }
+    shiftCount += countLeadingZeros32(a);
+    return shiftCount;
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' can be arbitrarily large; in particular, if `count' is greater
+| than 128, the result will be 0.  The result is broken into two 64-bit pieces
+| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void
+ shift128Right(Bit64u a0, Bit64u a1, Bit16s count, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    Bit64u z0, z1;
+    Bit8s negCount = (-count) & 63;
+
+    if (count == 0) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if (count < 64) {
+        z1 = (a0<<negCount) | (a1>>count);
+        z0 = a0>>count;
+    }
+    else {
+        z1 = (count < 64) ? (a0>>(count & 63)) : 0;
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  If any nonzero bits are shifted off, they
+| are ``jammed'' into the least significant bit of the result by setting the
+| least significant bit to 1.  The value of `count' can be arbitrarily large;
+| in particular, if `count' is greater than 128, the result will be either
+| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+| nonzero.  The result is broken into two 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void
+ shift128RightJamming(
+     Bit64u a0, Bit64u a1, Bit16s count, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    Bit64u z0, z1;
+    Bit8s negCount = (- count) & 63;
+
+    if (count == 0) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if (count < 64) {
+        z1 = (a0<<negCount) | (a1>>count) | ((a1<<negCount) != 0);
+        z0 = a0>>count;
+    }
+    else {
+        if (count == 64) {
+            z1 = a0 | (a1 != 0);
+        }
+        else if (count < 128) {
+            z1 = (a0>>(count & 63)) | (((a0<<negCount) | a1) != 0);
+        }
+        else {
+            z1 = ((a0 | a1) != 0);
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+| by 64 _plus_ the number of bits given in `count'.  The shifted result is
+| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+| off form a third 64-bit result as follows:  The _last_ bit shifted off is
+| the most-significant bit of the extra result, and the other 63 bits of the
+| extra result are all zero if and only if _all_but_the_last_ bits shifted off
+| were all zero.  This extra result is stored in the location pointed to by
+| `z2Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0', `a1', and `a2' are considered
+| to form a fixed-point value with binary point between `a1' and `a2'.  This
+| fixed-point value is shifted right by the number of bits given in `count',
+| and the integer part of the result is returned at the locations pointed to
+| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+| corrupted as described above, and is returned at the location pointed to by
+| `z2Ptr'.)
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void
+ shift128ExtraRightJamming(
+     Bit64u a0,
+     Bit64u a1,
+     Bit64u a2,
+     Bit16s count,
+     Bit64u *z0Ptr,
+     Bit64u *z1Ptr,
+     Bit64u *z2Ptr
+)
+{
+    Bit64u z0, z1, z2;
+    Bit8s negCount = (-count) & 63;
+
+    if (count == 0) {
+        z2 = a2;
+        z1 = a1;
+        z0 = a0;
+    }
+    else {
+        if (count < 64) {
+            z2 = a1<<negCount;
+            z1 = (a0<<negCount) | (a1>>count);
+            z0 = a0>>count;
+        }
+        else {
+            if (count == 64) {
+                z2 = a1;
+                z1 = a0;
+            }
+            else {
+                a2 |= a1;
+                if (count < 128) {
+                    z2 = a0<<negCount;
+                    z1 = a0>>(count & 63);
+                }
+                else {
+                    z2 = (count == 128) ? a0 : (a0 != 0);
+                    z1 = 0;
+                }
+            }
+            z0 = 0;
+        }
+        z2 |= (a2 != 0);
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' must be less than 64.  The result is broken into two 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void
+ shortShift128Left(
+     Bit64u a0, Bit64u a1, Bit16s count, Bit64u *z0Ptr, Bit64u *z1Ptr)
+{
+    *z1Ptr = a1<<count;
+    *z0Ptr = (count == 0) ? a0 : (a0<<count) | (a1>>((-count) & 63));
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+| modulo 2^192, so any carry out is lost.  The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void add192(
+     Bit64u a0,
+     Bit64u a1,
+     Bit64u a2,
+     Bit64u b0,
+     Bit64u b1,
+     Bit64u b2,
+     Bit64u *z0Ptr,
+     Bit64u *z1Ptr,
+     Bit64u *z2Ptr
+)
+{
+    Bit64u z0, z1, z2;
+    Bit8s carry0, carry1;
+
+    z2 = a2 + b2;
+    carry1 = (z2 < a2);
+    z1 = a1 + b1;
+    carry0 = (z1 < a1);
+    z0 = a0 + b0;
+    z1 += carry1;
+    z0 += (z1 < carry1);
+    z0 += carry0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
+| result is broken into three 64-bit pieces which are stored at the locations
+| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void sub192(
+     Bit64u a0,
+     Bit64u a1,
+     Bit64u a2,
+     Bit64u b0,
+     Bit64u b1,
+     Bit64u b2,
+     Bit64u *z0Ptr,
+     Bit64u *z1Ptr,
+     Bit64u *z2Ptr
+)
+{
+    Bit64u z0, z1, z2;
+    Bit8s borrow0, borrow1;
+
+    z2 = a2 - b2;
+    borrow1 = (a2 < b2);
+    z1 = a1 - b1;
+    borrow0 = (a1 < b1);
+    z0 = a0 - b0;
+    z0 -= (z1 < borrow1);
+    z1 -= borrow1;
+    z0 -= borrow0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int eq128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
+{
+    return (a0 == b0) && (a1 == b1);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int le128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
+{
+    return (a0 < b0) || ((a0 == b0) && (a1 <= b1));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+| returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int lt128(Bit64u a0, Bit64u a1, Bit64u b0, Bit64u b1)
+{
+    return (a0 < b0) || ((a0 == b0) && (a1 < b1));
+}
+
+#endif	/* FLOATX80 */
+
+#endif
--- a/bochs/fpu/softfloat-round-pack.cc
+++ b/bochs/fpu/softfloat-round-pack.cc
@ -0,0 +1,557 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#include "softfloat.h"
+#include "softfloat-round-pack.h"
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations. (Can be specialized to target
+| if desired).
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine:  (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output.  These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize.h"
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input.  If `zSign' is 1, the input is negated before being converted to an
+| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer.  However, if the fixed-
+| point input is too large, the invalid exception is raised and the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32s roundAndPackInt32(int zSign, Bit64u absZ, float_status_t &status)
+{
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    int roundIncrement = 0x40;
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = 0x7F;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    int roundBits = absZ & 0x7F;
+    absZ = (absZ + roundIncrement)>>7;
+    absZ &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
+    Bit32s z = absZ;
+    if (zSign) z = -z;
+    if ((absZ>>32) || (z && ((z < 0) ^ zSign))) {
+        float_raise(status, float_flag_invalid);
+        return (Bit32s)(int32_indefinite);
+    }
+    if (roundBits) float_raise(status, float_flag_inexact);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &status)
+{
+    Bit64s z;
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    int increment = ((Bit64s) absZ1 < 0);
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) increment = 0;
+        else {
+            if (zSign) {
+                increment = (roundingMode == float_round_down) && absZ1;
+            }
+            else {
+                increment = (roundingMode == float_round_up) && absZ1;
+            }
+        }
+    }
+    if (increment) {
+        ++absZ0;
+        if (absZ0 == 0) goto overflow;
+        absZ0 &= ~(((Bit64u) (absZ1<<1) == 0) & roundNearestEven);
+    }
+    z = absZ0;
+    if (zSign) z = -z;
+    if (z && ((z < 0) ^ zSign)) {
+ overflow:
+        float_raise(status, float_flag_invalid);
+        return (Bit64s)(int64_indefinite);
+    }
+    if (absZ1) float_raise(status, float_flag_inexact);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr)
+{
+    int shiftCount = countLeadingZeros32(aSig) - 8;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status)
+{
+    Bit32s roundIncrement, roundBits, roundMask;
+
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    roundIncrement = 0x40;
+    roundMask = 0x7F;
+
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = roundMask;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & roundMask;
+    if (0xFD <= (Bit16u) zExp) {
+        if ((0xFD < zExp)
+             || ((zExp == 0xFD)
+                  && ((Bit32s) (zSig + roundIncrement) < 0)))
+        {
+            float_raise(status, float_flag_overflow | float_flag_inexact);
+            return packFloat32(zSign, 0xFF, 0) - (roundIncrement == 0);
+        }
+        if (zExp < 0) {
+            int isTiny =
+                   (status.float_detect_tininess == float_tininess_before_rounding)
+                || (zExp < -1)
+                || (zSig + roundIncrement < 0x80000000);
+            shift32RightJamming(zSig, -zExp, &zSig);
+            zExp = 0;
+            roundBits = zSig & roundMask;
+            if (isTiny && roundBits) {
+                float_raise(status, float_flag_underflow);
+                if(get_flush_underflow_to_zero(status)) {
+                    float_raise(status, float_flag_inexact);
+                    return packFloat32(zSign, 0, 0);
+                }
+            }
+        }
+    }
+    if (roundBits) float_raise(status, float_flag_inexact);
+    zSig = ((zSig + roundIncrement) & ~roundMask) >> 7;
+    zSig &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
+    if (zSig == 0) zExp = 0;
+    return packFloat32(zSign, zExp, zSig);
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status)
+{
+    int shiftCount = countLeadingZeros32(zSig) - 1;
+    return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount, status);
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr)
+{
+    int shiftCount = countLeadingZeros64(aSig) - 11;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status)
+{
+    Bit16s roundIncrement, roundBits;
+
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    roundIncrement = 0x200;
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = 0x3FF;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & 0x3FF;
+    if (0x7FD <= (Bit16u) zExp) {
+        if ((0x7FD < zExp)
+             || ((zExp == 0x7FD)
+                  && ((Bit64s) (zSig + roundIncrement) < 0)))
+        {
+            float_raise(status, float_flag_overflow | float_flag_inexact);
+            return packFloat64(zSign, 0x7FF, 0) - (roundIncrement == 0);
+        }
+        if (zExp < 0) {
+            int isTiny =
+                   (status.float_detect_tininess == float_tininess_before_rounding)
+                || (zExp < -1)
+                || (zSig + roundIncrement < BX_CONST64(0x8000000000000000));
+            shift64RightJamming(zSig, -zExp, &zSig);
+            zExp = 0;
+            roundBits = zSig & 0x3FF;
+            if (isTiny && roundBits) {
+                float_raise(status, float_flag_underflow);
+                if(get_flush_underflow_to_zero(status)) {
+                    float_raise(status, float_flag_inexact);
+                    return packFloat64(zSign, 0, 0);
+                }
+            }
+        }
+    }
+    if (roundBits) float_raise(status, float_flag_inexact);
+    zSig = (zSig + roundIncrement)>>10;
+    zSig &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
+    if (zSig == 0) zExp = 0;
+    return packFloat64(zSign, zExp, zSig);
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status)
+{
+    int shiftCount = countLeadingZeros64(zSig) - 1;
+    return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount, status);
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr)
+{
+    int shiftCount = countLeadingZeros64(aSig);
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively.  Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 roundAndPackFloatx80(int roundingPrecision, 
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
+{
+    Bit64u roundIncrement, roundMask, roundBits;
+    int increment;
+
+    Bit8u roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    if (roundingPrecision == 64) {
+        roundIncrement = BX_CONST64(0x0000000000000400);
+        roundMask = BX_CONST64(0x00000000000007FF);
+    }
+    else if (roundingPrecision == 32) {
+        roundIncrement = BX_CONST64(0x0000008000000000);
+        roundMask = BX_CONST64(0x000000FFFFFFFFFF);
+    }
+    else goto precision80;
+
+    zSig0 |= (zSig1 != 0);
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = roundMask;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig0 & roundMask;
+    if (0x7FFD <= (Bit32u) (zExp - 1)) {
+        if ((0x7FFE < zExp)
+             || ((zExp == 0x7FFE) && (zSig0 + roundIncrement < zSig0))) 
+        {
+            goto overflow;
+        }
+        if (zExp <= 0) {
+            int isTiny =
+                   (status.float_detect_tininess == float_tininess_before_rounding)
+                || (zExp < 0)
+                || (zSig0 <= zSig0 + roundIncrement);
+            shift64RightJamming(zSig0, 1 - zExp, &zSig0);
+            zExp = 0;
+            roundBits = zSig0 & roundMask;
+            if (isTiny && roundBits) float_raise(status, float_flag_underflow);
+            if (roundBits) float_raise(status, float_flag_inexact);
+            zSig0 += roundIncrement;
+            if ((Bit64s) zSig0 < 0) zExp = 1;
+            roundIncrement = roundMask + 1;
+            if (roundNearestEven && (roundBits<<1 == roundIncrement))
+                roundMask |= roundIncrement;
+            zSig0 &= ~roundMask;
+            return packFloatx80(zSign, zExp, zSig0);
+        }
+    }
+    if (roundBits) float_raise(status, float_flag_inexact);
+    zSig0 += roundIncrement;
+    if (zSig0 < roundIncrement) {
+        ++zExp;
+        zSig0 = BX_CONST64(0x8000000000000000);
+    }
+    roundIncrement = roundMask + 1;
+    if (roundNearestEven && (roundBits<<1 == roundIncrement))
+        roundMask |= roundIncrement;
+    zSig0 &= ~roundMask;
+    if (zSig0 == 0) zExp = 0;
+    return packFloatx80(zSign, zExp, zSig0);
+ precision80:
+    increment = ((Bit64s) zSig1 < 0);
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) increment = 0;
+        else {
+            if (zSign) {
+                increment = (roundingMode == float_round_down) && zSig1;
+            }
+            else {
+                increment = (roundingMode == float_round_up) && zSig1;
+            }
+        }
+    }
+    if (0x7FFD <= (Bit32u) (zExp - 1)) {
+        if ((0x7FFE < zExp)
+             || ((zExp == 0x7FFE)
+                  && (zSig0 == BX_CONST64(0xFFFFFFFFFFFFFFFF))
+                  && increment)) 
+        {
+            roundMask = 0;
+ overflow:
+            float_raise(status, float_flag_overflow | float_flag_inexact);
+            if ((roundingMode == float_round_to_zero)
+                 || (zSign && (roundingMode == float_round_up))
+                 || (! zSign && (roundingMode == float_round_down))) 
+            {
+                return packFloatx80(zSign, 0x7FFE, ~roundMask);
+            }
+
+            return packFloatx80(zSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+        if (zExp <= 0) {
+            int isTiny =
+                   (status.float_detect_tininess == float_tininess_before_rounding)
+                || (zExp < 0)
+                || ! increment
+                || (zSig0 < BX_CONST64(0xFFFFFFFFFFFFFFFF));
+            shift64ExtraRightJamming(zSig0, zSig1, 1 - zExp, &zSig0, &zSig1);
+            zExp = 0;
+            if (isTiny && zSig1) float_raise(status, float_flag_underflow);
+            if (zSig1) float_raise(status, float_flag_inexact);
+            if (roundNearestEven) 
+                increment = ((Bit64s) zSig1 < 0);
+            else {
+                if (zSign) {
+                    increment = (roundingMode == float_round_down) && zSig1;
+                }
+                else {
+                    increment = (roundingMode == float_round_up) && zSig1;
+                }
+            }
+            if (increment) {
+                ++zSig0;
+                zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
+                if ((Bit64s) zSig0 < 0) zExp = 1;
+            }
+            return packFloatx80(zSign, zExp, zSig0);
+        }
+    }
+    if (zSig1) float_raise(status, float_flag_inexact);
+    if (increment) {
+        ++zSig0;
+        if (zSig0 == 0) {
+            ++zExp;
+            zSig0 = BX_CONST64(0x8000000000000000);
+        }
+        else {
+            zSig0 &= ~(((Bit64u) (zSig1<<1) == 0) & roundNearestEven);
+        }
+    }
+    else {
+        if (zSig0 == 0) zExp = 0;
+    }
+    return packFloatx80(zSign, zExp, zSig0);
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision, 
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status)
+{
+    if (zSig0 == 0) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
+    }
+    int shiftCount = countLeadingZeros64(zSig0);
+    shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
+    zExp -= shiftCount;
+    return
+        roundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
+}
+
+#endif
--- a/bochs/fpu/softfloat-round-pack.h
+++ b/bochs/fpu/softfloat-round-pack.h
@ -0,0 +1,201 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#include "softfloat.h"
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input.  If `zSign' is 1, the input is negated before being converted to an
+| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer.  However, if the fixed-
+| point input is too large, the invalid exception is raised and the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit32s roundAndPackInt32(int zSign, Bit64u absZ, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit64s roundAndPackInt64(int zSign, Bit64u absZ0, Bit64u absZ1, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat32Subnormal(Bit32u aSig, Bit16s *zExpPtr, Bit32u *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 roundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float32 normalizeRoundAndPackFloat32(int zSign, Bit16s zExp, Bit32u zSig, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat64Subnormal(Bit64u aSig, Bit16s *zExpPtr, Bit64u *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 roundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float64 normalizeRoundAndPackFloat64(int zSign, Bit16s zExp, Bit64u zSig, float_status_t &status);
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloatx80Subnormal(Bit64u aSig, Bit32s *zExpPtr, Bit64u *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively.  Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 roundAndPackFloatx80(int roundingPrecision, 
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision, 
+        int zSign, Bit32s zExp, Bit64u zSig0, Bit64u zSig1, float_status_t &status);
+
+#endif
--- a/bochs/fpu/softfloat-specialize.h
+++ b/bochs/fpu/softfloat-specialize.h
@ -0,0 +1,532 @@
+/*============================================================================
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#ifndef SOFTFLOAT_SPECIALIZE_H
+#define SOFTFLOAT_SPECIALIZE_H
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#define int16_indefinite 0x8000
+#define int32_indefinite 0x80000000
+#define int64_indefinite BX_CONST64(0x8000000000000000)
+
+/*----------------------------------------------------------------------------
+| Internal canonical NaN format.
+*----------------------------------------------------------------------------*/
+
+typedef struct {
+    int sign;
+    Bit64u hi, lo;
+} commonNaNT;
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float32_default_nan 0xFFC00000
+/*
+#define float32_default_nan 0x7FFFFFFF
+*/
+
+#define float32_fraction extractFloat32Frac
+#define float32_exp extractFloat32Exp
+#define float32_sign extractFloat32Sign
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit32u extractFloat32Frac(float32 a)
+{
+    return a & 0x007FFFFF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit16s extractFloat32Exp(float32 a)
+{
+    return (a>>23) & 0xFF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int extractFloat32Sign(float32 a)
+{
+    return a>>31;
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| single-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float32 packFloat32(int zSign, Bit16s zExp, Bit32u zSig)
+{
+    return (((Bit32u) zSign)<<31) + (((Bit32u) zExp)<<23) + zSig;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float32_is_nan(float32 a)
+{
+    return (0xFF000000 < (Bit32u) (a<<1));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float32_is_signaling_nan(float32 a)
+{
+    return (((a>>22) & 0x1FF) == 0x1FE) && (a & 0x003FFFFF);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE commonNaNT float32ToCommonNaN(float32 a, float_status_t &status)
+{
+    commonNaNT z;
+    if (float32_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = a>>31;
+    z.lo = 0;
+    z.hi = ((Bit64u) a)<<41;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the single-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float32 commonNaNToFloat32(commonNaNT a)
+{
+    return (((Bit32u) a.sign)<<31) | 0x7FC00000 | (a.hi>>41);
+}
+
+/*----------------------------------------------------------------------------
+| Takes single-precision floating-point NaN `a' and returns the appropriate 
+| NaN result.  If `a' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float32 propagateFloat32NaN(float32 a, float_status_t &status)
+{
+    if (float32_is_signaling_nan(a))
+        float_raise(status, float_flag_invalid);
+
+    return a | 0x00400000;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float32 propagateFloat32NaN(float32 a, float32 b, float_status_t &status)
+{
+    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float32_is_nan(a);
+    aIsSignalingNaN = float32_is_signaling_nan(a);
+    bIsNaN = float32_is_nan(b);
+    bIsSignalingNaN = float32_is_signaling_nan(b);
+    a |= 0x00400000;
+    b |= 0x00400000;
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (get_float_nan_handling_mode(status) == float_larger_significand_nan) {
+        if (aIsSignalingNaN) {
+            if (bIsSignalingNaN) goto returnLargerSignificand;
+            return bIsNaN ? b : a;
+        }
+        else if (aIsNaN) {
+            if (bIsSignalingNaN | ! bIsNaN) return a;
+      returnLargerSignificand:
+            if ((Bit32u) (a<<1) < (Bit32u) (b<<1)) return b;
+            if ((Bit32u) (b<<1) < (Bit32u) (a<<1)) return a;
+            return (a < b) ? a : b;
+        }
+        else {
+            return b;
+        }
+    } else {
+        return (aIsSignalingNaN | aIsNaN) ? a : b;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float64_default_nan BX_CONST64(0xFFF8000000000000)
+/*
+#define float64_default_nan BX_CONST64(0x7FFFFFFFFFFFFFFF)
+*/
+
+#define float64_fraction extractFloat64Frac
+#define float64_exp extractFloat64Exp
+#define float64_sign extractFloat64Sign
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit64u extractFloat64Frac(float64 a)
+{
+    return a & BX_CONST64(0x000FFFFFFFFFFFFF);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit16s extractFloat64Exp(float64 a)
+{
+    return (a>>52) & 0x7FF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int extractFloat64Sign(float64 a)
+{
+    return a>>63;
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| double-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float64 packFloat64(int zSign, Bit16s zExp, Bit64u zSig)
+{
+    return (((Bit64u) zSign)<<63) + (((Bit64u) zExp)<<52) + zSig;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float64_is_nan(float64 a)
+{
+    return (BX_CONST64(0xFFE0000000000000) < (Bit64u) (a<<1));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int float64_is_signaling_nan(float64 a)
+{
+    return (((a>>51) & 0xFFF) == 0xFFE) && (a & BX_CONST64(0x0007FFFFFFFFFFFF));
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE commonNaNT float64ToCommonNaN(float64 a, float_status_t &status)
+{
+    commonNaNT z;
+    if (float64_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = a>>63;
+    z.lo = 0;
+    z.hi = a<<12;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the double-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float64 commonNaNToFloat64(commonNaNT a)
+{
+    return (((Bit64u) a.sign)<<63) | BX_CONST64(0x7FF8000000000000) | (a.hi>>12);
+}
+
+/*----------------------------------------------------------------------------
+| Takes double-precision floating-point NaN `a' and returns the appropriate 
+| NaN result.  If `a' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float64 propagateFloat64NaN(float64 a, float_status_t &status)
+{
+    if (float64_is_signaling_nan(a))
+        float_raise(status, float_flag_invalid);
+
+    return a | BX_CONST64(0x0008000000000000);
+}
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float64 propagateFloat64NaN(float64 a, float64 b, float_status_t &status)
+{
+    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float64_is_nan(a);
+    aIsSignalingNaN = float64_is_signaling_nan(a);
+    bIsNaN = float64_is_nan(b);
+    bIsSignalingNaN = float64_is_signaling_nan(b);
+    a |= BX_CONST64(0x0008000000000000);
+    b |= BX_CONST64(0x0008000000000000);
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (get_float_nan_handling_mode(status) == float_larger_significand_nan) {
+        if (aIsSignalingNaN) {
+            if (bIsSignalingNaN) goto returnLargerSignificand;
+            return bIsNaN ? b : a;
+        }
+        else if (aIsNaN) {
+            if (bIsSignalingNaN | ! bIsNaN) return a;
+      returnLargerSignificand:
+            if ((Bit64u) (a<<1) < (Bit64u) (b<<1)) return b;
+            if ((Bit64u) (b<<1) < (Bit64u) (a<<1)) return a;
+            return (a < b) ? a : b;
+        }
+        else {
+            return b;
+        }
+    } else {
+        return (aIsSignalingNaN | aIsNaN) ? a : b;
+    }
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.  The
+| `high' and `low' values hold the most- and least-significant bits,
+| respectively.
+*----------------------------------------------------------------------------*/
+#define floatx80_default_nan_exp 0xFFFF
+#define floatx80_default_nan_fraction BX_CONST64(0xC000000000000000)
+/*
+#define floatx80_default_nan_exp 0x7FFF
+#define floatx80_default_nan_fraction BX_CONST64(0xFFFFFFFFFFFFFFFF)
+*/
+
+#define floatx80_fraction extractFloatx80Frac
+#define floatx80_exp extractFloatx80Exp
+#define floatx80_sign extractFloatx80Sign
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit64u extractFloatx80Frac(floatx80 a)
+{
+    return a.fraction;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit32s extractFloatx80Exp(floatx80 a)
+{
+    return a.exp & 0x7FFF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int extractFloatx80Sign(floatx80 a)
+{
+    return a.exp>>15;
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+| extended double-precision floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE floatx80 packFloatx80(int zSign, Bit32s zExp, Bit64u zSig)
+{
+    floatx80 z;
+    z.fraction = zSig;
+    z.exp = (zSign << 15) + zExp;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int floatx80_is_nan(floatx80 a)
+{
+    return ((a.exp & 0x7FFF) == 0x7FFF) && (Bit64s) (a.fraction<<1);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int floatx80_is_signaling_nan(floatx80 a)
+{
+    Bit64s aLow = a.fraction & ~BX_CONST64(0x4000000000000000);
+    return ((a.exp & 0x7FFF) == 0x7FFF) &&
+            (Bit64s) (aLow<<1) && (a.fraction == aLow);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is an
+| unsupported; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int floatx80_is_unsupported(floatx80 a)
+{
+    return ((a.exp & 0x7FFF) && !(a.fraction & BX_CONST64(0x8000000000000000)));
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
+| invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE commonNaNT floatx80ToCommonNaN(floatx80 a, float_status_t &status)
+{
+    commonNaNT z;
+    if (floatx80_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = a.exp >> 15;
+    z.lo = 0;
+    z.hi = a.fraction << 1;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the extended
+| double-precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE floatx80 commonNaNToFloatx80(commonNaNT a)
+{
+    floatx80 z;
+    z.fraction = BX_CONST64(0xC000000000000000) | (a.hi>>1);
+    z.exp = (((Bit16u) a.sign)<<15) | 0x7FFF;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes extended double-precision floating-point  NaN  `a' and returns the 
+| appropriate NaN result. If `a' is a signaling NaN, the invalid exception 
+| is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE floatx80 propagateFloatx80NaN(floatx80 a, float_status_t &status)
+{
+    if (floatx80_is_signaling_nan(a))
+        float_raise(status, float_flag_invalid);
+
+    a.fraction |= BX_CONST64(0xC000000000000000);
+
+    return a;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status_t &status)
+{
+    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = floatx80_is_nan(a);
+    aIsSignalingNaN = floatx80_is_signaling_nan(a);
+    bIsNaN = floatx80_is_nan(b);
+    bIsSignalingNaN = floatx80_is_signaling_nan(b);
+    a.fraction |= BX_CONST64(0xC000000000000000);
+    b.fraction |= BX_CONST64(0xC000000000000000);
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (aIsSignalingNaN) {
+        if (bIsSignalingNaN) goto returnLargerSignificand;
+        return bIsNaN ? b : a;
+    }
+    else if (aIsNaN) {
+        if (bIsSignalingNaN | ! bIsNaN) return a;
+ returnLargerSignificand:
+        if (a.fraction < b.fraction) return b;
+        if (b.fraction < a.fraction) return a;
+        return (a.exp < b.exp) ? a : b;
+    }
+    else {
+        return b;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+static const floatx80 floatx80_default_nan = 
+    packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+#endif	/* FLOATX80 */
+
+#endif
--- a/bochs/fpu/softfloat.cc
+++ b/bochs/fpu/softfloat.cc
--- a/bochs/fpu/softfloat.h
+++ b/bochs/fpu/softfloat.h
@ -0,0 +1,306 @@
+/*============================================================================
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#include <config.h>      /* generated by configure script from config.h.in */
+
+#ifndef SOFTFLOAT_H
+#define SOFTFLOAT_H
+
+#define FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+typedef Bit32u float32, Float32;
+typedef Bit64u float64, Float64;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point class.
+*----------------------------------------------------------------------------*/
+typedef enum {
+    float_zero,
+    float_NaN,
+    float_negative_inf,
+    float_positive_inf,
+    float_denormal,
+    float_normalized
+} float_class_t;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point underflow tininess-detection mode.
+*----------------------------------------------------------------------------*/
+enum {
+    float_tininess_after_rounding  = 0,
+    float_tininess_before_rounding = 1
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point NaN operands handling mode.
+*----------------------------------------------------------------------------*/
+enum float_nan_handling_mode_t {
+    float_larger_significand_nan = 0,	// this mode used by x87 FPU
+    float_first_operand_nan = 1		// this mode used by SSE
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+enum float_round_t {
+    float_round_nearest_even = 0,
+    float_round_down         = 1,
+    float_round_up           = 2,
+    float_round_to_zero      = 3
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+enum float_exception_flag_t {
+    float_flag_invalid   = 0x01,
+    float_flag_denormal  = 0x02,
+    float_flag_divbyzero = 0x04,
+    float_flag_overflow  = 0x08,
+    float_flag_underflow = 0x10,
+    float_flag_inexact   = 0x20
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point ordering relations
+*----------------------------------------------------------------------------*/
+enum {
+    float_relation_less      = -1,
+    float_relation_equal     =  0,
+    float_relation_greater   =  1,
+    float_relation_unordered =  2
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point status structure.
+*----------------------------------------------------------------------------*/
+struct float_status_t 
+{
+#ifdef FLOATX80
+    int float_rounding_precision;	/* floatx80 only */
+#endif
+    int float_detect_tininess;
+    int float_rounding_mode;
+    int float_exception_flags;
+    int float_nan_handling_mode;
+    int flush_underflow_to_zero;	/* flag register */
+};
+typedef struct float_status_t softfloat_status_word_t;
+
+/*----------------------------------------------------------------------------
+| Routine to raise any or all of the software IEC/IEEE floating-point
+| exception flags.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE void float_raise(float_status_t &status, int flags)
+{
+    status.float_exception_flags |= flags;
+}
+
+/*----------------------------------------------------------------------------
+| Returns current floating point rounding mode specified by status word.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int get_float_rounding_mode(float_status_t &status)
+{
+    return status.float_rounding_mode;
+}
+
+/*----------------------------------------------------------------------------
+| Returns current floating point precision (floatx80 only).
+*----------------------------------------------------------------------------*/
+
+#ifdef FLOATX80
+BX_CPP_INLINE int get_float_rounding_precision(float_status_t &status)
+{
+    return status.float_rounding_precision;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns current floating point NaN operands handling mode specified 
+| by status word.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int get_float_nan_handling_mode(float_status_t &status)
+{
+    return status.float_nan_handling_mode;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the <flush-underflow-to-zero> feature is supported;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE int get_flush_underflow_to_zero(float_status_t &status)
+{
+    return status.flush_underflow_to_zero;
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32(Bit32s, float_status_t &status);
+float64 int32_to_float64(Bit32s);
+float32 int64_to_float32(Bit64s, float_status_t &status);
+float64 int64_to_float64(Bit64s, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+Bit32s float32_to_int32(float32, float_status_t &status);
+Bit32s float32_to_int32_round_to_zero(float32, float_status_t &status);
+Bit64s float32_to_int64(float32, float_status_t &status);
+Bit64s float32_to_int64_round_to_zero(float32, float_status_t &status);
+float64 float32_to_float64(float32, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int(float32, float_status_t &status);
+float32 float32_add(float32, float32, float_status_t &status);
+float32 float32_sub(float32, float32, float_status_t &status);
+float32 float32_mul(float32, float32, float_status_t &status);
+float32 float32_div(float32, float32, float_status_t &status);
+float32 float32_sqrt(float32, float_status_t &status);
+
+typedef int (*float32_compare_method)(float32, float32, float_status_t &status);
+int float32_eq(float32, float32, float_status_t &status);
+int float32_le(float32, float32, float_status_t &status);
+int float32_lt(float32, float32, float_status_t &status);
+int float32_eq_signaling(float32, float32, float_status_t &status);
+int float32_le_quiet(float32, float32, float_status_t &status);
+int float32_lt_quiet(float32, float32, float_status_t &status);
+int float32_unordered(float32, float32, float_status_t &status);
+int float32_compare(float32, float32, float_status_t &status);
+int float32_compare_quiet(float32, float32, float_status_t &status);
+
+float_class_t float32_class(float32);
+int float32_is_signaling_nan(float32);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+Bit32s float64_to_int32(float64, float_status_t &status);
+Bit32s float64_to_int32_round_to_zero(float64, float_status_t &status);
+Bit64s float64_to_int64(float64, float_status_t &status);
+Bit64s float64_to_int64_round_to_zero(float64, float_status_t &status);
+float32 float64_to_float32(float64, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int(float64, float_status_t &status);
+float64 float64_add(float64, float64, float_status_t &status);
+float64 float64_sub(float64, float64, float_status_t &status);
+float64 float64_mul(float64, float64, float_status_t &status);
+float64 float64_div(float64, float64, float_status_t &status);
+float64 float64_sqrt(float64, float_status_t &status);
+
+typedef int (*float64_compare_method)(float64, float64, float_status_t &status);
+int float64_eq(float64, float64, float_status_t &status);
+int float64_le(float64, float64, float_status_t &status);
+int float64_lt(float64, float64, float_status_t &status);
+int float64_eq_signaling(float64, float64, float_status_t &status);
+int float64_le_quiet(float64, float64, float_status_t &status);
+int float64_lt_quiet(float64, float64, float_status_t &status);
+int float64_unordered(float64, float64, float_status_t &status);
+int float64_compare(float64, float64, float_status_t &status);
+int float64_compare_quiet(float64, float64, float_status_t &status);
+
+float_class_t float64_class(float64);
+int float64_is_signaling_nan(float64);
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+#ifdef BX_BIG_ENDIAN
+struct floatx80 {	// do not allow 16-byte extension of the structure
+    Bit16u exp;
+    Bit64u fraction;
+} GCC_ATTRIBUTE((aligned(1), packed));
+#else
+struct floatx80 {	// do not allow 16-byte extension of the structure
+    Bit64u fraction;
+    Bit16u exp;
+} GCC_ATTRIBUTE((aligned(1), packed));
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+floatx80 int32_to_floatx80(Bit32s);
+floatx80 int64_to_floatx80(Bit64s);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+floatx80 float32_to_floatx80(float32, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+floatx80 float64_to_floatx80(float64, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+Bit32s floatx80_to_int32(floatx80, float_status_t &status);
+Bit32s floatx80_to_int32_round_to_zero(floatx80, float_status_t &status);
+Bit64s floatx80_to_int64(floatx80, float_status_t &status);
+Bit64s floatx80_to_int64_round_to_zero(floatx80, float_status_t &status);
+
+float32 floatx80_to_float32(floatx80, float_status_t &status);
+float64 floatx80_to_float64(floatx80, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int(floatx80, float_status_t &status);
+floatx80 floatx80_add(floatx80, floatx80, float_status_t &status);
+floatx80 floatx80_sub(floatx80, floatx80, float_status_t &status);
+floatx80 floatx80_mul(floatx80, floatx80, float_status_t &status);
+floatx80 floatx80_div(floatx80, floatx80, float_status_t &status);
+floatx80 floatx80_sqrt(floatx80, float_status_t &status);
+
+int floatx80_is_signaling_nan(floatx80);
+
+#endif  /* FLOATX80 */
+
+#endif
--- a/bochs/fpu/softfloatx80.cc
+++ b/bochs/fpu/softfloatx80.cc
@ -0,0 +1,347 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#include "softfloatx80.h"
+#include "softfloat-round-pack.h"
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations. (Can be specialized to target
+| if desired).
+*----------------------------------------------------------------------------*/
+
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 16-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic - which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN or the 
+| conversion overflows, the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit16s floatx80_to_int16(floatx80 a, float_status_t &status)
+{
+   if (floatx80_is_unsupported(a))
+   {
+        float_raise(status, float_flag_invalid);
+        return int16_indefinite;
+   }
+
+   Bit32s v32 = floatx80_to_int32(a, status);
+
+   if ((v32 > (Bit32s) BX_MAX_BIT16S) || (v32 < (Bit32s) BX_MIN_BIT16S))
+   {
+        float_raise(status, float_flag_invalid);
+        return int16_indefinite;
+   }
+
+   return (Bit16s) v32;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 16-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN or the conversion overflows, the integer 
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+Bit16s floatx80_to_int16_round_to_zero(floatx80 a, float_status_t &status)
+{
+   if (floatx80_is_unsupported(a))
+   {
+        float_raise(status, float_flag_invalid);
+        return int16_indefinite;
+   }
+
+   Bit32s v32 = floatx80_to_int32_round_to_zero(a, status);
+
+   if ((v32 > (Bit32s) BX_MAX_BIT16S) || (v32 < (Bit32s) BX_MIN_BIT16S))
+   {
+        float_raise(status, float_flag_invalid);
+        return int16_indefinite;
+   }
+
+   return (Bit16s) v32;
+}
+
+/*----------------------------------------------------------------------------
+| Separate the source extended double-precision floating point value `a'
+| into its exponent and significand, store the significant back to the
+| 'a' and return the exponent. The operation performed is a superset of 
+| the IEC/IEEE recommended logb(x) function.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_extract(floatx80 &a, float_status_t &status)
+{
+    Bit64u aSig = extractFloatx80Frac(a);
+    Bit32s aExp = extractFloatx80Exp(a);
+    int   aSign = extractFloatx80Sign(a);
+
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        a = floatx80_default_nan;
+        return a;
+    }
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1)) 
+        {
+            float_raise(status, float_flag_invalid);
+            a = propagateFloatx80NaN(a, status);
+            return a;
+        }
+        return packFloatx80(0, 0x7FFF, BX_CONST64(0x8000000000000000));;
+    }
+    if (aExp == 0)
+    {
+        if (aSig == 0) {
+            float_raise(status, float_flag_divbyzero);
+            a = packFloatx80(aSign, 0, 0);
+            return packFloatx80(1, 0x7FFF, BX_CONST64(0x8000000000000000));
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+
+    a.exp = (aSign << 15) + 0x3FFF;
+    a.fraction = aSig;
+    return int32_to_floatx80(aExp - 0x3FFF);
+}
+
+/*----------------------------------------------------------------------------
+| Scales extended double-precision floating-point value in operand `a' by 
+| value `b'. The function truncates the value in the second operand 'b' to 
+| an integral value and adds that value to the exponent of the operand 'a'.
+| The operation performed according to the IEC/IEEE Standard for Binary 
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status_t &status)
+{
+    Bit32s aExp, bExp;
+    Bit64u aSig, bSig;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+    int bSign = extractFloatx80Sign(b);
+
+    if (aExp == 0x7FFF) {
+        if ((Bit64u) (aSig<<1) || ((bExp == 0x7FFF) && (Bit64u) (bSig<<1))) 
+        {
+            return propagateFloatx80NaN(a, b, status);
+        }
+        if ((bExp == 0x7FFF) && bSign) {
+            float_raise(status, float_flag_invalid);
+            return floatx80_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return a;
+    }
+    if (bExp == 0x7FFF) {
+        if ((Bit64u) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if ((aExp | aSig) == 0) {
+            if (! bSign) {
+                float_raise(status, float_flag_invalid);
+                return floatx80_default_nan;
+            }
+            return a;
+        }
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        if (bSign) return packFloatx80(aSign, 0, 0);
+        return packFloatx80(aSign, 0x7FFF, BX_CONST64(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return a;
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) return a;
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+
+    if (bExp > 0x400E) {   
+        /* generate appropriate overflow/underflow */
+        return roundAndPackFloatx80(80, aSign, 
+                          bSign ? -0x3FFF : 0x7FFF, aSig, 0, status);
+    }
+    if (bExp < 0x3FFF) return a;
+
+    int shiftCount = 0x403E - bExp;
+    bSig >>= shiftCount;
+    Bit32s scale = bSig;
+    if (bSign) scale = -scale; /* -32768..32767 */
+    return
+        roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0, status);
+}
+
+/*----------------------------------------------------------------------------
+| Determine extended-precision floating-point number class.
+*----------------------------------------------------------------------------*/
+
+float_class_t floatx80_class(floatx80 a)
+{
+   Bit32s aExp = extractFloatx80Exp(a);
+   Bit64u aSig = extractFloatx80Frac(a);
+
+   if(aExp == 0) {
+       if (aSig == 0)
+           return float_zero;
+
+       /* denormal or pseudo-denormal */
+       return float_denormal;
+   }
+
+   /* valid numbers have the MS bit set */
+   if (!(aSig & BX_CONST64(0x8000000000000000)))
+       return float_NaN; /* report unsupported as NaNs */
+
+   if(aExp == 0x7fff) {
+       int aSign = extractFloatx80Sign(a);
+
+       if (((Bit64u) (aSig<< 1)) == 0)
+           return (aSign) ? float_negative_inf : float_positive_inf;
+
+       return float_NaN;
+   }
+    
+   return float_normalized;
+}
+
+/*----------------------------------------------------------------------------
+| Compare  between  two extended precision  floating  point  numbers. Returns
+| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
+| the    value    'a'   is   less   than   the   corresponding   value   `b',
+| 'float_relation_greater' if the value 'a' is greater than the corresponding
+| value `b', or 'float_relation_unordered' otherwise. 
+*----------------------------------------------------------------------------*/
+
+int floatx80_compare(floatx80 a, floatx80 b, float_status_t &status)
+{
+    float_class_t aClass = floatx80_class(a);
+    float_class_t bClass = floatx80_class(b);
+
+    if (aClass == float_NaN || bClass == float_NaN)
+    {
+        float_raise(status, float_flag_invalid);
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_denormal || bClass == float_denormal) 
+    {
+        float_raise(status, float_flag_denormal);
+    }
+
+    if ((a.fraction == b.fraction) && (a.exp == b.exp))
+    {
+        return float_relation_equal;
+    }
+
+    if (aClass == float_zero && bClass == float_zero)
+    {
+        return float_relation_equal;
+    }
+
+    int aSign = extractFloatx80Sign(a);
+    int bSign = extractFloatx80Sign(b);
+    if (aSign != bSign)
+        return (aSign) ? float_relation_less : float_relation_greater;
+
+    int less_than = 
+	aSign ? lt128(b.exp, b.fraction, a.exp, a.fraction)
+	      : lt128(a.exp, a.fraction, b.exp, b.fraction);
+
+    if (less_than) return float_relation_less;
+    return float_relation_greater;
+}
+
+/*----------------------------------------------------------------------------
+| Compare  between  two extended precision  floating  point  numbers. Returns
+| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
+| the    value    'a'   is   less   than   the   corresponding   value   `b',
+| 'float_relation_greater' if the value 'a' is greater than the corresponding
+| value `b', or 'float_relation_unordered' otherwise. Quiet NaNs do not cause 
+| an exception.
+*----------------------------------------------------------------------------*/
+
+int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status_t &status)
+{
+    float_class_t aClass = floatx80_class(a);
+    float_class_t bClass = floatx80_class(b);
+
+    if (aClass == float_NaN || bClass == float_NaN)
+    {
+        if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+            float_raise(status, float_flag_invalid);
+
+        if (floatx80_is_signaling_nan(a) || floatx80_is_signaling_nan(b))
+            float_raise(status, float_flag_invalid);
+
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_denormal || bClass == float_denormal) 
+    {
+        float_raise(status, float_flag_denormal);
+    }
+
+    if ((a.fraction == b.fraction) && (a.exp == b.exp))
+    {
+        return float_relation_equal;
+    }
+
+    if (aClass == float_zero && bClass == float_zero)
+    {
+        return float_relation_equal;
+    }
+
+    int aSign = extractFloatx80Sign(a);
+    int bSign = extractFloatx80Sign(b);
+    if (aSign != bSign)
+        return (aSign) ? float_relation_less : float_relation_greater;
+
+    int less_than = 
+	aSign ? lt128(b.exp, b.fraction, a.exp, a.fraction)
+	      : lt128(a.exp, a.fraction, b.exp, b.fraction);
+
+    if (less_than) return float_relation_less;
+    return float_relation_greater;
+}
--- a/bochs/fpu/softfloatx80.h
+++ b/bochs/fpu/softfloatx80.h
@ -0,0 +1,96 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman (gate at fidonet.org.il)
+ * ==========================================================================*/ 
+
+#ifndef SOFTFLOATX80_EXTENSIONS_H
+#define SOFTFLOATX80_EXTENSIONS_H
+
+#include "softfloat.h"
+#include "softfloat-specialize.h"
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+
+Bit16s floatx80_to_int16(floatx80, float_status_t &status);
+Bit16s floatx80_to_int16_round_to_zero(floatx80, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_extract(floatx80 &a, float_status_t &status);
+float_class_t floatx80_class(floatx80);
+floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status_t &status);
+floatx80 floatx80_remainder(floatx80 a, floatx80 b, Bit64u &q, float_status_t &status);
+floatx80 floatx80_ieee754_remainder(floatx80 a, floatx80 b, Bit64u &q, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision compare.
+*----------------------------------------------------------------------------*/
+
+int floatx80_compare(floatx80, floatx80, float_status_t &status);
+int floatx80_compare_quiet(floatx80, floatx80, float_status_t &status);
+
+/*-----------------------------------------------------------------------------
+| Calculates the absolute value of the extended double-precision floating-point
+| value `a'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE floatx80& floatx80_abs(floatx80 &reg)
+{
+    reg.exp &= 0x7FFF;
+    return reg;   
+}
+
+/*-----------------------------------------------------------------------------
+| Changes the sign of the extended double-precision floating-point value 'a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary 
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE floatx80& floatx80_chs(floatx80 &reg)
+{
+    reg.exp ^= 0x8000;
+    return reg;   
+}
+
+/*-----------------------------------------------------------------------------
+| Commonly used extended double-precision floating-point constants.
+*----------------------------------------------------------------------------*/
+
+extern const floatx80 Const_QNaN;
+extern const floatx80 Const_Z;
+extern const floatx80 Const_1;
+extern const floatx80 Const_L2T;
+extern const floatx80 Const_L2E;
+extern const floatx80 Const_PI;
+extern const floatx80 Const_PI2;
+extern const floatx80 Const_PI4;
+extern const floatx80 Const_LG2;
+extern const floatx80 Const_LN2;
+extern const floatx80 Const_INF;
+
+#endif
--- a/bochs/fpu/tag_w.h
+++ b/bochs/fpu/tag_w.h
@ -0,0 +1,35 @@
+/////////////////////////////////////////////////////////////////////////
+//
+//   Copyright (c) 2004 Stanislav Shwartsman
+//          Written by Stanislav Shwartsman <gate at fidonet.org.il>
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+//
+
+#ifndef _TAG_W_H
+#define _TAG_W_H
+
+/* Tag Word */
+#define FPU_Tag_Valid   0x00
+#define FPU_Tag_Zero    0x01
+#define FPU_Tag_Special 0x02
+#define FPU_Tag_Empty   0x03
+
+#ifdef __cplusplus
+extern "C" 
+#endif
+int FPU_tagof(FPU_REG *reg) BX_CPP_AttrRegparmN(1);
+
+#endif
--- a/bochs/fpu/todo
+++ b/bochs/fpu/todo
@ -0,0 +1,15 @@
+TODO:
+
+1. Check for denormal and pseudodenormal operands in ALL instructions
+
+2. Unmasked underflow should correct the result by magic number
+   for all operations, including float32 and float64
+
+3. The instructions still implemented using old FPU library:
+	FSIN, FCOS, FSINCOS, FPTAN, FPATAN, FYL2XP1, F2XM1, FYL2X
+
+4. Set SW_C1 according to PRECISION_UP or PRECISION_DOWN conditions
+
+5. Mode float_tininess_before_rounding is deprecated, remove from 
+   softfloat code
+