Fast table-based implementation of reciprocal (RCPSS/RCPPS)

This implemntation is much more clear than old one. RSQRTSS/RSQRTPS coming soon.
2003-12-30 23:06:59 +00:00 · 2003-12-30 23:06:59 +00:00 · 52d75d7aed
commit 52d75d7aed
parent e7e0b40bd1
7 changed files with 546 additions and 243 deletions
--- a/bochs/cpu/3dnow.cc
+++ b/bochs/cpu/3dnow.cc
@ -29,7 +29,6 @@
 static void prepare_softfloat_status_word
 	(softfloat_status_word_t &status, int rounding_mode)
 {
-  status.float_precision = 32;
  status.float_detect_tininess = float_tininess_before_rounding;
  status.float_exception_flags = 0; // clear exceptions before execution
  status.float_nan_handling_mode = float_first_operand_nan;
--- a/bochs/cpu/Makefile.in
+++ b/bochs/cpu/Makefile.in
@ -68,6 +68,7 @@ OBJS = \
 	sse.o \
 	sse_move.o \
 	sse_pfp.o \
+	sse_rcp.o \
 	soft_int.o \
 	io_pro.o \
 	$(APIC_OBJS) \
@ -939,6 +940,19 @@ sse_pfp.o: sse_pfp.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/de
  ../iodev/serial.h ../iodev/unmapped.h ../iodev/eth.h ../iodev/ne2k.h \
  ../iodev/guest2host.h ../iodev/slowdown_timer.h ../iodev/extfpuirq.h \
  ../instrument/stubs/instrument.h softfloat.h
+sse_rcp.o: sse_pfp.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/debug.h \
+  ../bxversion.h ../gui/siminterface.h ../state_file.h ../cpu/cpu.h \
+  ../cpu/lazy_flags.h ../cpu/i387.h ../cpu/xmm.h ../memory/memory.h \
+  ../pc_system.h ../plugin.h ../extplugin.h ../gui/gui.h \
+  ../gui/textconfig.h ../gui/keymap.h ../iodev/iodev.h ../iodev/pci.h \
+  ../iodev/pci2isa.h ../iodev/pcivga.h ../iodev/vga.h ../iodev/ioapic.h \
+  ../iodev/biosdev.h ../iodev/cmos.h ../iodev/dma.h ../iodev/floppy.h \
+  ../iodev/harddrv.h ../iodev/cdrom.h ../iodev/vmware3.h \
+  ../iodev/keyboard.h ../iodev/parallel.h ../iodev/pic.h ../iodev/pit.h \
+  ../iodev/pit_wrap.h ../iodev/pit82c54.h ../iodev/virt_timer.h \
+  ../iodev/serial.h ../iodev/unmapped.h ../iodev/eth.h ../iodev/ne2k.h \
+  ../iodev/guest2host.h ../iodev/slowdown_timer.h ../iodev/extfpuirq.h \
+  ../instrument/stubs/instrument.h softfloat.h softfloat-specialize.h
 stack16.o: stack16.@CPP_SUFFIX@ ../bochs.h ../config.h ../osdep.h ../bx_debug/debug.h \
  ../bxversion.h ../gui/siminterface.h ../state_file.h ../cpu/cpu.h \
  ../cpu/lazy_flags.h ../cpu/i387.h ../cpu/xmm.h ../memory/memory.h \
--- a/bochs/cpu/softfloat-specialize.h
+++ b/bochs/cpu/softfloat-specialize.h
@ -75,15 +75,6 @@ BX_CPP_INLINE int get_flush_underflow_to_zero(float_status_t &status)
    return status.flush_underflow_to_zero;
 }

-/*----------------------------------------------------------------------------
-| Returns current floating point precision.
-*----------------------------------------------------------------------------*/
-
-BX_CPP_INLINE int get_float_precision(float_status_t &status)
-{
-    return status.float_precision;
-}
-
 /*----------------------------------------------------------------------------
 | Internal canonical NaN format.
 *----------------------------------------------------------------------------*/
@ -97,10 +88,59 @@ typedef struct {
 | The pattern for a default generated single-precision NaN.
 *----------------------------------------------------------------------------*/
 #define float32_default_nan 0xFFC00000
-/*        in another version
+/*
 #define float32_default_nan 0x7FFFFFFF
 */

+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit32u extractFloat32Frac(float32 a)
+{
+    return a & 0x007FFFFF;
+}
+
+#define float32_fraction extractFloat32Frac
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit16s extractFloat32Exp(float32 a)
+{
+    return (a>>23) & 0xFF;
+}
+
+#define float32_exp extractFloat32Exp
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE flag extractFloat32Sign(float32 a)
+{
+    return a>>31;
+}
+
+#define float32_sign extractFloat32Sign
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| single-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float32 packFloat32(flag zSign, Bit16s zExp, Bit32u zSig)
+{
+    return (((Bit32u) zSign)<<31) + (((Bit32u) zExp)<<23) + zSig;
+}
+
 /*----------------------------------------------------------------------------
 | Returns 1 if the single-precision floating-point value `a' is a NaN;
 | otherwise returns 0.
@ -189,10 +229,59 @@ static float32 propagateFloat32NaN(float32 a, float32 b, float_status_t &status)
 | The pattern for a default generated double-precision NaN.
 *----------------------------------------------------------------------------*/
 #define float64_default_nan BX_CONST64(0xFFF8000000000000)
-/*                in another version
+/*
 #define float64_default_nan BX_CONST64(0x7FFFFFFFFFFFFFFF)
 */

+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit64u extractFloat64Frac(float64 a)
+{
+    return a & BX_CONST64(0x000FFFFFFFFFFFFF);
+}
+
+#define float64_fraction extractFloat64Frac
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE Bit16s extractFloat64Exp(float64 a)
+{
+    return (a>>52) & 0x7FF;
+}
+
+#define float64_exp extractFloat64Exp
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE flag extractFloat64Sign(float64 a)
+{
+    return a>>63;
+}
+
+#define float64_sign extractFloat64Sign
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| double-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+BX_CPP_INLINE float64 packFloat64(flag zSign, Bit16s zExp, Bit64u zSig)
+{
+    return (((Bit64u) zSign)<<63) + (((Bit64u) zExp)<<52) + zSig;
+}
+
 /*----------------------------------------------------------------------------
 | Returns 1 if the double-precision floating-point value `a' is a NaN;
 | otherwise returns 0.
--- a/bochs/cpu/softfloat.cc
+++ b/bochs/cpu/softfloat.cc
@ -150,33 +150,6 @@ static Bit64s roundAndPackInt64(flag zSign, Bit64u absZ0, Bit64u absZ1, float_st
    return z;
 }

-/*----------------------------------------------------------------------------
-| Returns the fraction bits of the single-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-BX_CPP_INLINE Bit32u extractFloat32Frac(float32 a)
-{
-    return a & 0x007FFFFF;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the exponent bits of the single-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-BX_CPP_INLINE Bit16s extractFloat32Exp(float32 a)
-{
-    return (a>>23) & 0xFF;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the sign bit of the single-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-BX_CPP_INLINE flag extractFloat32Sign(float32 a)
-{
-    return a>>31;
-}
-
 /*----------------------------------------------------------------------------
 | Determine single-precision floating-point number class
 *----------------------------------------------------------------------------*/
@ -219,22 +192,6 @@ static void
    *zExpPtr = 1 - shiftCount;
 }

-/*----------------------------------------------------------------------------
-| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
-| single-precision floating-point value, returning the result.  After being
-| shifted into the proper positions, the three fields are simply added
-| together to form the result.  This means that any integer portion of `zSig'
-| will be added into the exponent.  Since a properly normalized significand
-| will have an integer portion equal to 1, the `zExp' input should be 1 less
-| than the desired result exponent whenever `zSig' is a complete, normalized
-| significand.
-*----------------------------------------------------------------------------*/
-
-BX_CPP_INLINE float32 packFloat32(flag zSign, Bit16s zExp, Bit32u zSig)
-{
-    return (((Bit32u) zSign)<<31) + (((Bit32u) zExp)<<23) + zSig;
-}
-
 /*----------------------------------------------------------------------------
 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 | and significand `zSig', and returns the proper single-precision floating-
@ -264,14 +221,8 @@ static float32 roundAndPackFloat32(flag zSign, Bit16s zExp, Bit32u zSig, float_s

    roundingMode = get_float_rounding_mode(status);
    int roundNearestEven = (roundingMode == float_round_nearest_even);
-
-    if(get_float_precision(status) == 12) {
-        roundIncrement = 0x20000;
-        roundMask = 0x3FFFF;
-    } else {
-	roundIncrement = 0x40;
-	roundMask = 0x7F;
-    }
+    roundIncrement = 0x40;
+    roundMask = 0x7F;

    if (! roundNearestEven) {
        if (roundingMode == float_round_to_zero) {
@ -336,33 +287,6 @@ static float32
    return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount, status);
 }

-/*----------------------------------------------------------------------------
-| Returns the fraction bits of the double-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-BX_CPP_INLINE Bit64u extractFloat64Frac(float64 a)
-{
-    return a & BX_CONST64(0x000FFFFFFFFFFFFF);
-}
-
-/*----------------------------------------------------------------------------
-| Returns the exponent bits of the double-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-BX_CPP_INLINE Bit16s extractFloat64Exp(float64 a)
-{
-    return (a>>52) & 0x7FF;
-}
-
-/*----------------------------------------------------------------------------
-| Returns the sign bit of the double-precision floating-point value `a'.
-*----------------------------------------------------------------------------*/
-
-BX_CPP_INLINE flag extractFloat64Sign(float64 a)
-{
-    return a>>63;
-}
-
 /*----------------------------------------------------------------------------
 | Determine double-precision floating-point number class
 *----------------------------------------------------------------------------*/
@ -405,22 +329,6 @@ static void
    *zExpPtr = 1 - shiftCount;
 }

-/*----------------------------------------------------------------------------
-| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
-| double-precision floating-point value, returning the result.  After being
-| shifted into the proper positions, the three fields are simply added
-| together to form the result.  This means that any integer portion of `zSig'
-| will be added into the exponent.  Since a properly normalized significand
-| will have an integer portion equal to 1, the `zExp' input should be 1 less
-| than the desired result exponent whenever `zSig' is a complete, normalized
-| significand.
-*----------------------------------------------------------------------------*/
-
-BX_CPP_INLINE float64 packFloat64(flag zSign, Bit16s zExp, Bit64u zSig)
-{
-    return (((Bit64u) zSign)<<63) + (((Bit64u) zExp)<<52) + zSig;
-}
-
 /*----------------------------------------------------------------------------
 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
 | and significand `zSig', and returns the proper double-precision floating-
--- a/bochs/cpu/softfloat.h
+++ b/bochs/cpu/softfloat.h
@ -34,6 +34,9 @@ these four paragraphs for those parts of this code that are retained.

 #include <config.h>      /* generated by configure script from config.h.in */

+#ifndef SOFTFLOAT_H
+#define SOFTFLOAT_H
+
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE floating-point types.
 *----------------------------------------------------------------------------*/
@ -106,7 +109,6 @@ enum {
 *----------------------------------------------------------------------------*/
 struct float_status_t 
 {
-    int float_precision;		/* 12/32/64/80 bit precision */
    int float_detect_tininess;
    int float_rounding_mode;
    int float_exception_flags;
@ -201,3 +203,5 @@ int float64_compare_quiet(float64, float64, float_status_t &status);

 float_class_t float64_class(float64);
 int float64_is_signaling_nan(float64);
+
+#endif
--- a/bochs/cpu/sse_pfp.cc
+++ b/bochs/cpu/sse_pfp.cc
@ -41,9 +41,8 @@ void BX_CPU_C::check_exceptionsSSE(int exceptions_flags)
  }
 }

-static void mxcsr_to_softfloat_status_word(softfloat_status_word_t &status, bx_mxcsr_t mxcsr, unsigned precision = 32)
+static void mxcsr_to_softfloat_status_word(softfloat_status_word_t &status, bx_mxcsr_t mxcsr)
 {
-  status.float_precision = precision; // affects only float32 and float80 operations
  status.float_detect_tininess = float_tininess_before_rounding;
  status.float_exception_flags = 0; // clear exceptions before execution
  status.float_nan_handling_mode = float_first_operand_nan;
@ -53,54 +52,6 @@ static void mxcsr_to_softfloat_status_word(softfloat_status_word_t &status, bx_m
       (mxcsr.get_flush_masked_underflow() && mxcsr.get_UM()) ? 1 : 0;
 }

-BX_CPP_INLINE Float32 convert_to_QNaN(Float32 op)
-{
-  return op | 0x00400000;
-}
-
-// approximate reciprocal of scalar single precision FP
-static Float32 approximate_reciprocal(Float32 op)
-{
-  softfloat_status_word_t status_word;
-  float_class_t op_class = float32_class(op);
-
-  static const Float32 one = 0x3F800000;
-  Float32 result;
-
-  if (op_class == float_NaN)
-  {
-    return convert_to_QNaN(op);
-  } 
-  else {
-    if (op_class == float_denormal)
-    {
-      op &= ((Bit32u)(1) << 31);
-    } 
-    else if(op_class == float_normalized)
-    {
-      /*
-       * for Katmai, a one will be placed in the 12th bit after decimal
-       *  point, and the lower bits will be cleared.
-       */
-      op &= 0xFFFFF000;
-      op |= 0x00000800;
-    }
-
-    /* 
-     * Calculate (1/1.yyyyyyyyyyy1), the result is always rounded to the 
-     *  12th bit after the decimal point by round-to-nearest, regardless
-     *  of the current rounding mode. 
-    */
-
-    mxcsr_to_softfloat_status_word(status_word, 
-            bx_mxcsr_t(MXCSR_FLUSH_MASKED_UNDERFLOW | MXCSR_UM), 12);
-
-    result = float32_div(one, op, status_word);
-  }
-  
-  return result;
-}
-
 // handle DAZ
 static float32 handleDAZ(float32 op)
 {
@ -1460,92 +1411,6 @@ void BX_CPU_C::SQRTSS_VssWss(bxInstruction_c *i)
 #endif
 }

-void BX_CPU_C::RSQRTPS_VpsWps(bxInstruction_c *i)
-{
-#if BX_SUPPORT_SSE >= 1
-  BX_CPU_THIS_PTR prepareSSE();
-
-  BX_PANIC(("RSQRTPS_VpsWps: SSE instruction still not implemented"));
-#else
-  BX_INFO(("RSQRTPS_VpsWps: required SSE, use --enable-sse option"));
-  UndefinedOpcode(i);
-#endif
-}
-
-void BX_CPU_C::RSQRTSS_VssWss(bxInstruction_c *i)
-{
-#if BX_SUPPORT_SSE >= 1
-  BX_CPU_THIS_PTR prepareSSE();
-
-  BX_PANIC(("RSQRTSS_VssWss: SSE instruction still not implemented"));
-#else
-  BX_INFO(("RSQRTSS_VssWss: required SSE, use --enable-sse option"));
-  UndefinedOpcode(i);
-#endif
-}
-
-/* 
- * Opcode: 0F 53
- * Approximate reciprocals of packed single precision FP values from XMM2/MEM.
- * Possible floating point exceptions: -
- */
-void BX_CPU_C::RCPPS_VpsWps(bxInstruction_c *i)
-{
-#if BX_SUPPORT_SSE >= 1
-  BX_CPU_THIS_PTR prepareSSE();
-  BxPackedXmmRegister op;
-
-  /* op is a register or memory reference */
-  if (i->modC0()) {
-    op = BX_READ_XMM_REG(i->rm());
-  }
-  else {
-    /* pointer, segment address pair */
-    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
-  }
-
-  op.xmm32u(0) = approximate_reciprocal(op.xmm32u(0));
-  op.xmm32u(1) = approximate_reciprocal(op.xmm32u(1));
-  op.xmm32u(2) = approximate_reciprocal(op.xmm32u(2));
-  op.xmm32u(3) = approximate_reciprocal(op.xmm32u(3));
-
-  BX_WRITE_XMM_REG(i->nnn(), op);
-
-#else
-  BX_INFO(("RCPPS_VpsWps: required SSE, use --enable-sse option"));
-  UndefinedOpcode(i);
-#endif
-}
-
-/* 
- * Opcode: F3 0F 53
- * Approximate reciprocal of scalar single precision FP value from XMM2/MEM32.
- * Possible floating point exceptions: -
- */
-void BX_CPU_C::RCPSS_VssWss(bxInstruction_c *i)
-{
-#if BX_SUPPORT_SSE >= 1
-  BX_CPU_THIS_PTR prepareSSE();
-  Float32 op;
-
-  /* op is a register or memory reference */
-  if (i->modC0()) {
-    op = BX_READ_XMM_REG_LO_DWORD(i->rm());
-  }
-  else {
-    /* pointer, segment address pair */
-    read_virtual_dword(i->seg(), RMAddr(i), &op);
-  }
-
-  Float32 result = approximate_reciprocal(op);
-  BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), result);
-
-#else
-  BX_INFO(("RCPSS_VssWss: required SSE, use --enable-sse option"));
-  UndefinedOpcode(i);
-#endif
-}
-
 /* 
 * Opcode: 0F 58
 * Add packed single precision FP numbers from XMM2/MEM to XMM1.
--- a/bochs/cpu/sse_rcp.cc
+++ b/bochs/cpu/sse_rcp.cc
@ -0,0 +1,424 @@
+/////////////////////////////////////////////////////////////////////////
+//
+//   Copyright (c) 2002 Stanislav Shwartsman
+//          Written by Stanislav Shwartsman <gate@fidonet.org.il>
+//
+//  This library is free software; you can redistribute it and/or
+//  modify it under the terms of the GNU Lesser General Public
+//  License as published by the Free Software Foundation; either
+//  version 2 of the License, or (at your option) any later version.
+//
+//  This library is distributed in the hope that it will be useful,
+//  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+//  Lesser General Public License for more details.
+//
+//  You should have received a copy of the GNU Lesser General Public
+//  License along with this library; if not, write to the Free Software
+//  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+//
+
+#define NEED_CPU_REG_SHORTCUTS 1
+#include "bochs.h"
+#define LOG_THIS BX_CPU_THIS_PTR
+
+#include "softfloat.h"
+#include "softfloat-specialize.h"
+
+
+#if BX_SUPPORT_SSE
+
+BX_CPP_INLINE Float32 convert_to_QNaN(Float32 op)
+{
+  return op | 0x00400000;
+}
+
+static Bit16u rcp_table[2048] = {
+    0x7ff0, 0x7fd0, 0x7fb0, 0x7f90, 0x7f70, 0x7f50, 0x7f30, 0x7f10, 
+    0x7ef0, 0x7ed0, 0x7eb0, 0x7e90, 0x7e70, 0x7e50, 0x7e30, 0x7e10, 
+    0x7df8, 0x7dd8, 0x7db8, 0x7d98, 0x7d78, 0x7d58, 0x7d38, 0x7d18, 
+    0x7cf8, 0x7cd8, 0x7cb8, 0x7c98, 0x7c80, 0x7c60, 0x7c40, 0x7c20, 
+    0x7c00, 0x7be0, 0x7bc0, 0x7ba0, 0x7b88, 0x7b68, 0x7b48, 0x7b28, 
+    0x7b08, 0x7ae8, 0x7ac8, 0x7ab0, 0x7a90, 0x7a70, 0x7a50, 0x7a30, 
+    0x7a10, 0x79f8, 0x79d8, 0x79b8, 0x7998, 0x7978, 0x7960, 0x7940, 
+    0x7920, 0x7900, 0x78e0, 0x78c8, 0x78a8, 0x7888, 0x7868, 0x7850, 
+    0x7830, 0x7810, 0x77f0, 0x77d8, 0x77b8, 0x7798, 0x7778, 0x7760, 
+    0x7740, 0x7720, 0x7700, 0x76e8, 0x76c8, 0x76a8, 0x7690, 0x7670, 
+    0x7650, 0x7630, 0x7618, 0x75f8, 0x75d8, 0x75c0, 0x75a0, 0x7580, 
+    0x7568, 0x7548, 0x7528, 0x7510, 0x74f0, 0x74d0, 0x74b8, 0x7498, 
+    0x7478, 0x7460, 0x7440, 0x7420, 0x7408, 0x73e8, 0x73d0, 0x73b0, 
+    0x7390, 0x7378, 0x7358, 0x7338, 0x7320, 0x7300, 0x72e8, 0x72c8, 
+    0x72a8, 0x7290, 0x7270, 0x7258, 0x7238, 0x7220, 0x7200, 0x71e0, 
+    0x71c8, 0x71a8, 0x7190, 0x7170, 0x7158, 0x7138, 0x7118, 0x7100, 
+    0x70e0, 0x70c8, 0x70a8, 0x7090, 0x7070, 0x7058, 0x7038, 0x7020, 
+    0x7000, 0x6fe8, 0x6fc8, 0x6fb0, 0x6f90, 0x6f78, 0x6f58, 0x6f40, 
+    0x6f20, 0x6f08, 0x6ee8, 0x6ed0, 0x6eb0, 0x6e98, 0x6e78, 0x6e60, 
+    0x6e40, 0x6e28, 0x6e08, 0x6df0, 0x6dd0, 0x6db8, 0x6da0, 0x6d80, 
+    0x6d68, 0x6d48, 0x6d30, 0x6d10, 0x6cf8, 0x6cd8, 0x6cc0, 0x6ca8, 
+    0x6c88, 0x6c70, 0x6c50, 0x6c38, 0x6c20, 0x6c00, 0x6be8, 0x6bc8, 
+    0x6bb0, 0x6b98, 0x6b78, 0x6b60, 0x6b40, 0x6b28, 0x6b10, 0x6af0, 
+    0x6ad8, 0x6ac0, 0x6aa0, 0x6a88, 0x6a70, 0x6a50, 0x6a38, 0x6a20, 
+    0x6a00, 0x69e8, 0x69c8, 0x69b0, 0x6998, 0x6978, 0x6960, 0x6948, 
+    0x6930, 0x6910, 0x68f8, 0x68e0, 0x68c0, 0x68a8, 0x6890, 0x6870, 
+    0x6858, 0x6840, 0x6820, 0x6808, 0x67f0, 0x67d8, 0x67b8, 0x67a0, 
+    0x6788, 0x6770, 0x6750, 0x6738, 0x6720, 0x6700, 0x66e8, 0x66d0, 
+    0x66b8, 0x6698, 0x6680, 0x6668, 0x6650, 0x6638, 0x6618, 0x6600, 
+    0x65e8, 0x65d0, 0x65b0, 0x6598, 0x6580, 0x6568, 0x6550, 0x6530, 
+    0x6518, 0x6500, 0x64e8, 0x64c8, 0x64b0, 0x6498, 0x6480, 0x6468, 
+    0x6450, 0x6430, 0x6418, 0x6400, 0x63e8, 0x63d0, 0x63b8, 0x6398, 
+    0x6380, 0x6368, 0x6350, 0x6338, 0x6320, 0x6300, 0x62e8, 0x62d0, 
+    0x62b8, 0x62a0, 0x6288, 0x6270, 0x6250, 0x6238, 0x6220, 0x6208, 
+    0x61f0, 0x61d8, 0x61c0, 0x61a8, 0x6190, 0x6170, 0x6158, 0x6140, 
+    0x6128, 0x6110, 0x60f8, 0x60e0, 0x60c8, 0x60b0, 0x6098, 0x6080, 
+    0x6060, 0x6048, 0x6030, 0x6018, 0x6000, 0x5fe8, 0x5fd0, 0x5fb8, 
+    0x5fa0, 0x5f88, 0x5f70, 0x5f58, 0x5f40, 0x5f28, 0x5f10, 0x5ef8, 
+    0x5ee0, 0x5ec8, 0x5eb0, 0x5e98, 0x5e80, 0x5e68, 0x5e50, 0x5e30, 
+    0x5e18, 0x5e00, 0x5de8, 0x5dd0, 0x5db8, 0x5da0, 0x5d88, 0x5d70, 
+    0x5d58, 0x5d40, 0x5d30, 0x5d18, 0x5d00, 0x5ce8, 0x5cd0, 0x5cb8, 
+    0x5ca0, 0x5c88, 0x5c70, 0x5c58, 0x5c40, 0x5c28, 0x5c10, 0x5bf8, 
+    0x5be0, 0x5bc8, 0x5bb0, 0x5b98, 0x5b80, 0x5b68, 0x5b50, 0x5b38, 
+    0x5b20, 0x5b08, 0x5af8, 0x5ae0, 0x5ac8, 0x5ab0, 0x5a98, 0x5a80, 
+    0x5a68, 0x5a50, 0x5a38, 0x5a20, 0x5a08, 0x59f8, 0x59e0, 0x59c8, 
+    0x59b0, 0x5998, 0x5980, 0x5968, 0x5950, 0x5938, 0x5928, 0x5910, 
+    0x58f8, 0x58e0, 0x58c8, 0x58b0, 0x5898, 0x5880, 0x5870, 0x5858, 
+    0x5840, 0x5828, 0x5810, 0x57f8, 0x57e0, 0x57d0, 0x57b8, 0x57a0, 
+    0x5788, 0x5770, 0x5758, 0x5748, 0x5730, 0x5718, 0x5700, 0x56e8, 
+    0x56d0, 0x56c0, 0x56a8, 0x5690, 0x5678, 0x5660, 0x5650, 0x5638, 
+    0x5620, 0x5608, 0x55f0, 0x55e0, 0x55c8, 0x55b0, 0x5598, 0x5588, 
+    0x5570, 0x5558, 0x5540, 0x5528, 0x5518, 0x5500, 0x54e8, 0x54d0, 
+    0x54c0, 0x54a8, 0x5490, 0x5478, 0x5468, 0x5450, 0x5438, 0x5420, 
+    0x5410, 0x53f8, 0x53e0, 0x53c8, 0x53b8, 0x53a0, 0x5388, 0x5370, 
+    0x5360, 0x5348, 0x5330, 0x5318, 0x5308, 0x52f0, 0x52d8, 0x52c8, 
+    0x52b0, 0x5298, 0x5280, 0x5270, 0x5258, 0x5240, 0x5230, 0x5218, 
+    0x5200, 0x51f0, 0x51d8, 0x51c0, 0x51b0, 0x5198, 0x5180, 0x5170, 
+    0x5158, 0x5140, 0x5128, 0x5118, 0x5100, 0x50e8, 0x50d8, 0x50c0, 
+    0x50a8, 0x5098, 0x5080, 0x5070, 0x5058, 0x5040, 0x5030, 0x5018, 
+    0x5000, 0x4ff0, 0x4fd8, 0x4fc0, 0x4fb0, 0x4f98, 0x4f80, 0x4f70, 
+    0x4f58, 0x4f48, 0x4f30, 0x4f18, 0x4f08, 0x4ef0, 0x4ee0, 0x4ec8, 
+    0x4eb0, 0x4ea0, 0x4e88, 0x4e78, 0x4e60, 0x4e48, 0x4e38, 0x4e20, 
+    0x4e10, 0x4df8, 0x4de0, 0x4dd0, 0x4db8, 0x4da8, 0x4d90, 0x4d78, 
+    0x4d68, 0x4d50, 0x4d40, 0x4d28, 0x4d18, 0x4d00, 0x4ce8, 0x4cd8, 
+    0x4cc0, 0x4cb0, 0x4c98, 0x4c88, 0x4c70, 0x4c60, 0x4c48, 0x4c30, 
+    0x4c20, 0x4c08, 0x4bf8, 0x4be0, 0x4bd0, 0x4bb8, 0x4ba8, 0x4b90, 
+    0x4b80, 0x4b68, 0x4b58, 0x4b40, 0x4b30, 0x4b18, 0x4b08, 0x4af0, 
+    0x4ad8, 0x4ac8, 0x4ab0, 0x4aa0, 0x4a88, 0x4a78, 0x4a60, 0x4a50, 
+    0x4a38, 0x4a28, 0x4a10, 0x4a00, 0x49e8, 0x49d8, 0x49c8, 0x49b0, 
+    0x49a0, 0x4988, 0x4978, 0x4960, 0x4950, 0x4938, 0x4928, 0x4910, 
+    0x4900, 0x48e8, 0x48d8, 0x48c0, 0x48b0, 0x4898, 0x4888, 0x4878, 
+    0x4860, 0x4850, 0x4838, 0x4828, 0x4810, 0x4800, 0x47e8, 0x47d8, 
+    0x47c8, 0x47b0, 0x47a0, 0x4788, 0x4778, 0x4760, 0x4750, 0x4740, 
+    0x4728, 0x4718, 0x4700, 0x46f0, 0x46d8, 0x46c8, 0x46b8, 0x46a0, 
+    0x4690, 0x4678, 0x4668, 0x4658, 0x4640, 0x4630, 0x4618, 0x4608, 
+    0x45f8, 0x45e0, 0x45d0, 0x45b8, 0x45a8, 0x4598, 0x4580, 0x4570, 
+    0x4560, 0x4548, 0x4538, 0x4520, 0x4510, 0x4500, 0x44e8, 0x44d8, 
+    0x44c8, 0x44b0, 0x44a0, 0x4488, 0x4478, 0x4468, 0x4450, 0x4440, 
+    0x4430, 0x4418, 0x4408, 0x43f8, 0x43e0, 0x43d0, 0x43c0, 0x43a8, 
+    0x4398, 0x4388, 0x4370, 0x4360, 0x4350, 0x4338, 0x4328, 0x4318, 
+    0x4300, 0x42f0, 0x42e0, 0x42c8, 0x42b8, 0x42a8, 0x4290, 0x4280, 
+    0x4270, 0x4260, 0x4248, 0x4238, 0x4228, 0x4210, 0x4200, 0x41f0, 
+    0x41d8, 0x41c8, 0x41b8, 0x41a8, 0x4190, 0x4180, 0x4170, 0x4158, 
+    0x4148, 0x4138, 0x4128, 0x4110, 0x4100, 0x40f0, 0x40d8, 0x40c8, 
+    0x40b8, 0x40a8, 0x4090, 0x4080, 0x4070, 0x4060, 0x4048, 0x4038, 
+    0x4028, 0x4018, 0x4000, 0x3ff0, 0x3fe0, 0x3fd0, 0x3fb8, 0x3fa8, 
+    0x3f98, 0x3f88, 0x3f70, 0x3f60, 0x3f50, 0x3f40, 0x3f28, 0x3f18, 
+    0x3f08, 0x3ef8, 0x3ee8, 0x3ed0, 0x3ec0, 0x3eb0, 0x3ea0, 0x3e88, 
+    0x3e78, 0x3e68, 0x3e58, 0x3e48, 0x3e30, 0x3e20, 0x3e10, 0x3e00, 
+    0x3df0, 0x3dd8, 0x3dc8, 0x3db8, 0x3da8, 0x3d98, 0x3d80, 0x3d70, 
+    0x3d60, 0x3d50, 0x3d40, 0x3d28, 0x3d18, 0x3d08, 0x3cf8, 0x3ce8, 
+    0x3cd8, 0x3cc0, 0x3cb0, 0x3ca0, 0x3c90, 0x3c80, 0x3c70, 0x3c58, 
+    0x3c48, 0x3c38, 0x3c28, 0x3c18, 0x3c08, 0x3bf0, 0x3be0, 0x3bd0, 
+    0x3bc0, 0x3bb0, 0x3ba0, 0x3b90, 0x3b78, 0x3b68, 0x3b58, 0x3b48, 
+    0x3b38, 0x3b28, 0x3b18, 0x3b00, 0x3af0, 0x3ae0, 0x3ad0, 0x3ac0, 
+    0x3ab0, 0x3aa0, 0x3a88, 0x3a78, 0x3a68, 0x3a58, 0x3a48, 0x3a38, 
+    0x3a28, 0x3a18, 0x3a08, 0x39f0, 0x39e0, 0x39d0, 0x39c0, 0x39b0, 
+    0x39a0, 0x3990, 0x3980, 0x3970, 0x3958, 0x3948, 0x3938, 0x3928, 
+    0x3918, 0x3908, 0x38f8, 0x38e8, 0x38d8, 0x38c8, 0x38b8, 0x38a8, 
+    0x3890, 0x3880, 0x3870, 0x3860, 0x3850, 0x3840, 0x3830, 0x3820, 
+    0x3810, 0x3800, 0x37f0, 0x37e0, 0x37d0, 0x37c0, 0x37a8, 0x3798, 
+    0x3788, 0x3778, 0x3768, 0x3758, 0x3748, 0x3738, 0x3728, 0x3718, 
+    0x3708, 0x36f8, 0x36e8, 0x36d8, 0x36c8, 0x36b8, 0x36a8, 0x3698, 
+    0x3688, 0x3678, 0x3668, 0x3658, 0x3648, 0x3630, 0x3620, 0x3610, 
+    0x3600, 0x35f0, 0x35e0, 0x35d0, 0x35c0, 0x35b0, 0x35a0, 0x3590, 
+    0x3580, 0x3570, 0x3560, 0x3550, 0x3540, 0x3530, 0x3520, 0x3510, 
+    0x3500, 0x34f0, 0x34e0, 0x34d0, 0x34c0, 0x34b0, 0x34a0, 0x3490, 
+    0x3480, 0x3470, 0x3460, 0x3450, 0x3440, 0x3430, 0x3420, 0x3410, 
+    0x3400, 0x33f0, 0x33e0, 0x33d0, 0x33c8, 0x33b8, 0x33a8, 0x3398, 
+    0x3388, 0x3378, 0x3368, 0x3358, 0x3348, 0x3338, 0x3328, 0x3318, 
+    0x3308, 0x32f8, 0x32e8, 0x32d8, 0x32c8, 0x32b8, 0x32a8, 0x3298, 
+    0x3288, 0x3278, 0x3268, 0x3260, 0x3250, 0x3240, 0x3230, 0x3220, 
+    0x3210, 0x3200, 0x31f0, 0x31e0, 0x31d0, 0x31c0, 0x31b0, 0x31a0, 
+    0x3190, 0x3180, 0x3178, 0x3168, 0x3158, 0x3148, 0x3138, 0x3128, 
+    0x3118, 0x3108, 0x30f8, 0x30e8, 0x30d8, 0x30c8, 0x30c0, 0x30b0, 
+    0x30a0, 0x3090, 0x3080, 0x3070, 0x3060, 0x3050, 0x3040, 0x3030, 
+    0x3028, 0x3018, 0x3008, 0x2ff8, 0x2fe8, 0x2fd8, 0x2fc8, 0x2fb8, 
+    0x2fa8, 0x2fa0, 0x2f90, 0x2f80, 0x2f70, 0x2f60, 0x2f50, 0x2f40, 
+    0x2f30, 0x2f28, 0x2f18, 0x2f08, 0x2ef8, 0x2ee8, 0x2ed8, 0x2ec8, 
+    0x2eb8, 0x2eb0, 0x2ea0, 0x2e90, 0x2e80, 0x2e70, 0x2e60, 0x2e50, 
+    0x2e48, 0x2e38, 0x2e28, 0x2e18, 0x2e08, 0x2df8, 0x2df0, 0x2de0, 
+    0x2dd0, 0x2dc0, 0x2db0, 0x2da0, 0x2d90, 0x2d88, 0x2d78, 0x2d68, 
+    0x2d58, 0x2d48, 0x2d38, 0x2d30, 0x2d20, 0x2d10, 0x2d00, 0x2cf0, 
+    0x2ce0, 0x2cd8, 0x2cc8, 0x2cb8, 0x2ca8, 0x2c98, 0x2c90, 0x2c80, 
+    0x2c70, 0x2c60, 0x2c50, 0x2c40, 0x2c38, 0x2c28, 0x2c18, 0x2c08, 
+    0x2bf8, 0x2bf0, 0x2be0, 0x2bd0, 0x2bc0, 0x2bb0, 0x2ba8, 0x2b98, 
+    0x2b88, 0x2b78, 0x2b68, 0x2b60, 0x2b50, 0x2b40, 0x2b30, 0x2b20, 
+    0x2b18, 0x2b08, 0x2af8, 0x2ae8, 0x2ae0, 0x2ad0, 0x2ac0, 0x2ab0, 
+    0x2aa0, 0x2a98, 0x2a88, 0x2a78, 0x2a68, 0x2a60, 0x2a50, 0x2a40, 
+    0x2a30, 0x2a20, 0x2a18, 0x2a08, 0x29f8, 0x29e8, 0x29e0, 0x29d0, 
+    0x29c0, 0x29b0, 0x29a8, 0x2998, 0x2988, 0x2978, 0x2970, 0x2960, 
+    0x2950, 0x2940, 0x2938, 0x2928, 0x2918, 0x2908, 0x2900, 0x28f0, 
+    0x28e0, 0x28d0, 0x28c8, 0x28b8, 0x28a8, 0x2898, 0x2890, 0x2880, 
+    0x2870, 0x2868, 0x2858, 0x2848, 0x2838, 0x2830, 0x2820, 0x2810, 
+    0x2800, 0x27f8, 0x27e8, 0x27d8, 0x27d0, 0x27c0, 0x27b0, 0x27a0, 
+    0x2798, 0x2788, 0x2778, 0x2770, 0x2760, 0x2750, 0x2740, 0x2738, 
+    0x2728, 0x2718, 0x2710, 0x2700, 0x26f0, 0x26e8, 0x26d8, 0x26c8, 
+    0x26b8, 0x26b0, 0x26a0, 0x2690, 0x2688, 0x2678, 0x2668, 0x2660, 
+    0x2650, 0x2640, 0x2638, 0x2628, 0x2618, 0x2608, 0x2600, 0x25f0, 
+    0x25e0, 0x25d8, 0x25c8, 0x25b8, 0x25b0, 0x25a0, 0x2590, 0x2588, 
+    0x2578, 0x2568, 0x2560, 0x2550, 0x2540, 0x2538, 0x2528, 0x2518, 
+    0x2510, 0x2500, 0x24f0, 0x24e8, 0x24d8, 0x24c8, 0x24c0, 0x24b0, 
+    0x24a0, 0x2498, 0x2488, 0x2478, 0x2470, 0x2460, 0x2450, 0x2448, 
+    0x2438, 0x2430, 0x2420, 0x2410, 0x2408, 0x23f8, 0x23e8, 0x23e0, 
+    0x23d0, 0x23c0, 0x23b8, 0x23a8, 0x23a0, 0x2390, 0x2380, 0x2378, 
+    0x2368, 0x2358, 0x2350, 0x2340, 0x2330, 0x2328, 0x2318, 0x2310, 
+    0x2300, 0x22f0, 0x22e8, 0x22d8, 0x22d0, 0x22c0, 0x22b0, 0x22a8, 
+    0x2298, 0x2288, 0x2280, 0x2270, 0x2268, 0x2258, 0x2248, 0x2240, 
+    0x2230, 0x2228, 0x2218, 0x2208, 0x2200, 0x21f0, 0x21e8, 0x21d8, 
+    0x21c8, 0x21c0, 0x21b0, 0x21a8, 0x2198, 0x2188, 0x2180, 0x2170, 
+    0x2168, 0x2158, 0x2148, 0x2140, 0x2130, 0x2128, 0x2118, 0x2108, 
+    0x2100, 0x20f0, 0x20e8, 0x20d8, 0x20d0, 0x20c0, 0x20b0, 0x20a8, 
+    0x2098, 0x2090, 0x2080, 0x2078, 0x2068, 0x2058, 0x2050, 0x2040, 
+    0x2038, 0x2028, 0x2020, 0x2010, 0x2000, 0x1ff8, 0x1fe8, 0x1fe0, 
+    0x1fd0, 0x1fc8, 0x1fb8, 0x1fb0, 0x1fa0, 0x1f90, 0x1f88, 0x1f78, 
+    0x1f70, 0x1f60, 0x1f58, 0x1f48, 0x1f40, 0x1f30, 0x1f20, 0x1f18, 
+    0x1f08, 0x1f00, 0x1ef0, 0x1ee8, 0x1ed8, 0x1ed0, 0x1ec0, 0x1eb8, 
+    0x1ea8, 0x1ea0, 0x1e90, 0x1e80, 0x1e78, 0x1e68, 0x1e60, 0x1e50, 
+    0x1e48, 0x1e38, 0x1e30, 0x1e20, 0x1e18, 0x1e08, 0x1e00, 0x1df0, 
+    0x1de8, 0x1dd8, 0x1dd0, 0x1dc0, 0x1db8, 0x1da8, 0x1da0, 0x1d90, 
+    0x1d80, 0x1d78, 0x1d68, 0x1d60, 0x1d50, 0x1d48, 0x1d38, 0x1d30, 
+    0x1d20, 0x1d18, 0x1d08, 0x1d00, 0x1cf0, 0x1ce8, 0x1cd8, 0x1cd0, 
+    0x1cc0, 0x1cb8, 0x1ca8, 0x1ca0, 0x1c90, 0x1c88, 0x1c78, 0x1c70, 
+    0x1c60, 0x1c58, 0x1c48, 0x1c40, 0x1c30, 0x1c28, 0x1c18, 0x1c10, 
+    0x1c00, 0x1bf8, 0x1bf0, 0x1be0, 0x1bd8, 0x1bc8, 0x1bc0, 0x1bb0, 
+    0x1ba8, 0x1b98, 0x1b90, 0x1b80, 0x1b78, 0x1b68, 0x1b60, 0x1b50, 
+    0x1b48, 0x1b38, 0x1b30, 0x1b20, 0x1b18, 0x1b08, 0x1b00, 0x1af8, 
+    0x1ae8, 0x1ae0, 0x1ad0, 0x1ac8, 0x1ab8, 0x1ab0, 0x1aa0, 0x1a98, 
+    0x1a88, 0x1a80, 0x1a70, 0x1a68, 0x1a60, 0x1a50, 0x1a48, 0x1a38, 
+    0x1a30, 0x1a20, 0x1a18, 0x1a08, 0x1a00, 0x19f8, 0x19e8, 0x19e0, 
+    0x19d0, 0x19c8, 0x19b8, 0x19b0, 0x19a0, 0x1998, 0x1990, 0x1980, 
+    0x1978, 0x1968, 0x1960, 0x1950, 0x1948, 0x1938, 0x1930, 0x1928, 
+    0x1918, 0x1910, 0x1900, 0x18f8, 0x18e8, 0x18e0, 0x18d8, 0x18c8, 
+    0x18c0, 0x18b0, 0x18a8, 0x1898, 0x1890, 0x1888, 0x1878, 0x1870, 
+    0x1860, 0x1858, 0x1850, 0x1840, 0x1838, 0x1828, 0x1820, 0x1810, 
+    0x1808, 0x1800, 0x17f0, 0x17e8, 0x17d8, 0x17d0, 0x17c8, 0x17b8, 
+    0x17b0, 0x17a0, 0x1798, 0x1790, 0x1780, 0x1778, 0x1768, 0x1760, 
+    0x1758, 0x1748, 0x1740, 0x1730, 0x1728, 0x1720, 0x1710, 0x1708, 
+    0x16f8, 0x16f0, 0x16e8, 0x16d8, 0x16d0, 0x16c8, 0x16b8, 0x16b0, 
+    0x16a0, 0x1698, 0x1690, 0x1680, 0x1678, 0x1668, 0x1660, 0x1658, 
+    0x1648, 0x1640, 0x1638, 0x1628, 0x1620, 0x1610, 0x1608, 0x1600, 
+    0x15f0, 0x15e8, 0x15e0, 0x15d0, 0x15c8, 0x15b8, 0x15b0, 0x15a8, 
+    0x1598, 0x1590, 0x1588, 0x1578, 0x1570, 0x1568, 0x1558, 0x1550, 
+    0x1540, 0x1538, 0x1530, 0x1520, 0x1518, 0x1510, 0x1500, 0x14f8, 
+    0x14f0, 0x14e0, 0x14d8, 0x14d0, 0x14c0, 0x14b8, 0x14a8, 0x14a0, 
+    0x1498, 0x1488, 0x1480, 0x1478, 0x1468, 0x1460, 0x1458, 0x1448, 
+    0x1440, 0x1438, 0x1428, 0x1420, 0x1418, 0x1408, 0x1400, 0x13f8, 
+    0x13e8, 0x13e0, 0x13d8, 0x13c8, 0x13c0, 0x13b8, 0x13a8, 0x13a0, 
+    0x1398, 0x1388, 0x1380, 0x1378, 0x1368, 0x1360, 0x1358, 0x1348, 
+    0x1340, 0x1338, 0x1328, 0x1320, 0x1318, 0x1308, 0x1300, 0x12f8, 
+    0x12e8, 0x12e0, 0x12d8, 0x12d0, 0x12c0, 0x12b8, 0x12b0, 0x12a0, 
+    0x1298, 0x1290, 0x1280, 0x1278, 0x1270, 0x1260, 0x1258, 0x1250, 
+    0x1240, 0x1238, 0x1230, 0x1228, 0x1218, 0x1210, 0x1208, 0x11f8, 
+    0x11f0, 0x11e8, 0x11d8, 0x11d0, 0x11c8, 0x11c0, 0x11b0, 0x11a8, 
+    0x11a0, 0x1190, 0x1188, 0x1180, 0x1178, 0x1168, 0x1160, 0x1158, 
+    0x1148, 0x1140, 0x1138, 0x1128, 0x1120, 0x1118, 0x1110, 0x1100, 
+    0x10f8, 0x10f0, 0x10e8, 0x10d8, 0x10d0, 0x10c8, 0x10b8, 0x10b0, 
+    0x10a8, 0x10a0, 0x1090, 0x1088, 0x1080, 0x1070, 0x1068, 0x1060, 
+    0x1058, 0x1048, 0x1040, 0x1038, 0x1030, 0x1020, 0x1018, 0x1010, 
+    0x1000, 0x0ff8, 0x0ff0, 0x0fe8, 0x0fd8, 0x0fd0, 0x0fc8, 0x0fc0, 
+    0x0fb0, 0x0fa8, 0x0fa0, 0x0f98, 0x0f88, 0x0f80, 0x0f78, 0x0f70, 
+    0x0f60, 0x0f58, 0x0f50, 0x0f48, 0x0f38, 0x0f30, 0x0f28, 0x0f20, 
+    0x0f10, 0x0f08, 0x0f00, 0x0ef8, 0x0ee8, 0x0ee0, 0x0ed8, 0x0ed0, 
+    0x0ec0, 0x0eb8, 0x0eb0, 0x0ea8, 0x0e98, 0x0e90, 0x0e88, 0x0e80, 
+    0x0e70, 0x0e68, 0x0e60, 0x0e58, 0x0e48, 0x0e40, 0x0e38, 0x0e30, 
+    0x0e28, 0x0e18, 0x0e10, 0x0e08, 0x0e00, 0x0df0, 0x0de8, 0x0de0, 
+    0x0dd8, 0x0dc8, 0x0dc0, 0x0db8, 0x0db0, 0x0da8, 0x0d98, 0x0d90, 
+    0x0d88, 0x0d80, 0x0d70, 0x0d68, 0x0d60, 0x0d58, 0x0d50, 0x0d40, 
+    0x0d38, 0x0d30, 0x0d28, 0x0d18, 0x0d10, 0x0d08, 0x0d00, 0x0cf8, 
+    0x0ce8, 0x0ce0, 0x0cd8, 0x0cd0, 0x0cc8, 0x0cb8, 0x0cb0, 0x0ca8, 
+    0x0ca0, 0x0c98, 0x0c88, 0x0c80, 0x0c78, 0x0c70, 0x0c60, 0x0c58, 
+    0x0c50, 0x0c48, 0x0c40, 0x0c30, 0x0c28, 0x0c20, 0x0c18, 0x0c10, 
+    0x0c00, 0x0bf8, 0x0bf0, 0x0be8, 0x0be0, 0x0bd8, 0x0bc8, 0x0bc0, 
+    0x0bb8, 0x0bb0, 0x0ba8, 0x0b98, 0x0b90, 0x0b88, 0x0b80, 0x0b78, 
+    0x0b68, 0x0b60, 0x0b58, 0x0b50, 0x0b48, 0x0b40, 0x0b30, 0x0b28, 
+    0x0b20, 0x0b18, 0x0b10, 0x0b00, 0x0af8, 0x0af0, 0x0ae8, 0x0ae0, 
+    0x0ad8, 0x0ac8, 0x0ac0, 0x0ab8, 0x0ab0, 0x0aa8, 0x0a98, 0x0a90, 
+    0x0a88, 0x0a80, 0x0a78, 0x0a70, 0x0a60, 0x0a58, 0x0a50, 0x0a48, 
+    0x0a40, 0x0a38, 0x0a28, 0x0a20, 0x0a18, 0x0a10, 0x0a08, 0x0a00, 
+    0x09f0, 0x09e8, 0x09e0, 0x09d8, 0x09d0, 0x09c8, 0x09c0, 0x09b0, 
+    0x09a8, 0x09a0, 0x0998, 0x0990, 0x0988, 0x0978, 0x0970, 0x0968, 
+    0x0960, 0x0958, 0x0950, 0x0948, 0x0938, 0x0930, 0x0928, 0x0920, 
+    0x0918, 0x0910, 0x0900, 0x08f8, 0x08f0, 0x08e8, 0x08e0, 0x08d8, 
+    0x08d0, 0x08c0, 0x08b8, 0x08b0, 0x08a8, 0x08a0, 0x0898, 0x0890, 
+    0x0880, 0x0878, 0x0870, 0x0868, 0x0860, 0x0858, 0x0850, 0x0848, 
+    0x0838, 0x0830, 0x0828, 0x0820, 0x0818, 0x0810, 0x0808, 0x0800, 
+    0x07f0, 0x07e8, 0x07e0, 0x07d8, 0x07d0, 0x07c8, 0x07c0, 0x07b0, 
+    0x07a8, 0x07a0, 0x0798, 0x0790, 0x0788, 0x0780, 0x0778, 0x0770, 
+    0x0760, 0x0758, 0x0750, 0x0748, 0x0740, 0x0738, 0x0730, 0x0728, 
+    0x0718, 0x0710, 0x0708, 0x0700, 0x06f8, 0x06f0, 0x06e8, 0x06e0, 
+    0x06d8, 0x06c8, 0x06c0, 0x06b8, 0x06b0, 0x06a8, 0x06a0, 0x0698, 
+    0x0690, 0x0688, 0x0680, 0x0670, 0x0668, 0x0660, 0x0658, 0x0650, 
+    0x0648, 0x0640, 0x0638, 0x0630, 0x0620, 0x0618, 0x0610, 0x0608, 
+    0x0600, 0x05f8, 0x05f0, 0x05e8, 0x05e0, 0x05d8, 0x05d0, 0x05c0, 
+    0x05b8, 0x05b0, 0x05a8, 0x05a0, 0x0598, 0x0590, 0x0588, 0x0580, 
+    0x0578, 0x0570, 0x0560, 0x0558, 0x0550, 0x0548, 0x0540, 0x0538, 
+    0x0530, 0x0528, 0x0520, 0x0518, 0x0510, 0x0508, 0x04f8, 0x04f0, 
+    0x04e8, 0x04e0, 0x04d8, 0x04d0, 0x04c8, 0x04c0, 0x04b8, 0x04b0, 
+    0x04a8, 0x04a0, 0x0498, 0x0488, 0x0480, 0x0478, 0x0470, 0x0468, 
+    0x0460, 0x0458, 0x0450, 0x0448, 0x0440, 0x0438, 0x0430, 0x0428, 
+    0x0420, 0x0418, 0x0408, 0x0400, 0x03f8, 0x03f0, 0x03e8, 0x03e0, 
+    0x03d8, 0x03d0, 0x03c8, 0x03c0, 0x03b8, 0x03b0, 0x03a8, 0x03a0, 
+    0x0398, 0x0390, 0x0388, 0x0378, 0x0370, 0x0368, 0x0360, 0x0358, 
+    0x0350, 0x0348, 0x0340, 0x0338, 0x0330, 0x0328, 0x0320, 0x0318, 
+    0x0310, 0x0308, 0x0300, 0x02f8, 0x02f0, 0x02e8, 0x02d8, 0x02d0, 
+    0x02c8, 0x02c0, 0x02b8, 0x02b0, 0x02a8, 0x02a0, 0x0298, 0x0290, 
+    0x0288, 0x0280, 0x0278, 0x0270, 0x0268, 0x0260, 0x0258, 0x0250, 
+    0x0248, 0x0240, 0x0238, 0x0230, 0x0228, 0x0220, 0x0218, 0x0210, 
+    0x0200, 0x01f8, 0x01f0, 0x01e8, 0x01e0, 0x01d8, 0x01d0, 0x01c8, 
+    0x01c0, 0x01b8, 0x01b0, 0x01a8, 0x01a0, 0x0198, 0x0190, 0x0188, 
+    0x0180, 0x0178, 0x0170, 0x0168, 0x0160, 0x0158, 0x0150, 0x0148, 
+    0x0140, 0x0138, 0x0130, 0x0128, 0x0120, 0x0118, 0x0110, 0x0108, 
+    0x0100, 0x00f8, 0x00f0, 0x00e8, 0x00e0, 0x00d8, 0x00d0, 0x00c8, 
+    0x00c0, 0x00b8, 0x00b0, 0x00a8, 0x00a0, 0x0098, 0x0090, 0x0088, 
+    0x0080, 0x0078, 0x0070, 0x0068, 0x0060, 0x0058, 0x0050, 0x0048, 
+    0x0040, 0x0038, 0x0030, 0x0028, 0x0020, 0x0018, 0x0010, 0x0008
+};
+
+static const Float32 inf = 0x7f800000;
+
+// approximate reciprocal of scalar single precision FP
+static Float32 approximate_reciprocal(Float32 op)
+{
+  float_class_t op_class = float32_class(op);
+
+  int sign = float32_sign(op);
+
+  switch(op_class)
+  {
+    case float_negative_zero:
+    case float_positive_zero:
+    case float_denormal:
+        return packFloat32(sign, 0xFF, 0);
+
+    case float_negative_inf:
+    case float_positive_inf:
+        return packFloat32(sign, 0x00, 0);
+
+    case float_NaN:
+        return convert_to_QNaN(op);
+  }
+
+  Bit32u fraction = float32_fraction(op);
+  Bit16s exp = float32_exp(op);
+
+  /* 
+   * Calculate (1/1.yyyyyyyyyyy1), the result is always rounded to the 
+   *  12th bit after the decimal point by round-to-nearest, regardless
+   *  of the current rounding mode. 
+   *
+   * Using precalculated 2048-entry table.
+   */
+
+  exp = 253 - exp;
+  /* check for underflow */
+  if (exp <= 0)
+      return packFloat32(sign, 0x00, 0);
+
+  return packFloat32(sign, exp, (Bit32u)(rcp_table[fraction >> 12]) << 8);
+}
+
+#endif
+
+/* 
+ * Opcode: 0F 53
+ * Approximate reciprocals of packed single precision FP values from XMM2/MEM.
+ * Possible floating point exceptions: -
+ */
+void BX_CPU_C::RCPPS_VpsWps(bxInstruction_c *i)
+{
+#if BX_SUPPORT_SSE >= 1
+  BX_CPU_THIS_PTR prepareSSE();
+  BxPackedXmmRegister op;
+
+  /* op is a register or memory reference */
+  if (i->modC0()) {
+    op = BX_READ_XMM_REG(i->rm());
+  }
+  else {
+    /* pointer, segment address pair */
+    readVirtualDQwordAligned(i->seg(), RMAddr(i), (Bit8u *) &op);
+  }
+
+  op.xmm32u(0) = approximate_reciprocal(op.xmm32u(0));
+  op.xmm32u(1) = approximate_reciprocal(op.xmm32u(1));
+  op.xmm32u(2) = approximate_reciprocal(op.xmm32u(2));
+  op.xmm32u(3) = approximate_reciprocal(op.xmm32u(3));
+
+  BX_WRITE_XMM_REG(i->nnn(), op);
+
+#else
+  BX_INFO(("RCPPS_VpsWps: required SSE, use --enable-sse option"));
+  UndefinedOpcode(i);
+#endif
+}
+
+/* 
+ * Opcode: F3 0F 53
+ * Approximate reciprocal of scalar single precision FP value from XMM2/MEM32.
+ * Possible floating point exceptions: -
+ */
+void BX_CPU_C::RCPSS_VssWss(bxInstruction_c *i)
+{
+#if BX_SUPPORT_SSE >= 1
+  BX_CPU_THIS_PTR prepareSSE();
+  Float32 op;
+
+  /* op is a register or memory reference */
+  if (i->modC0()) {
+    op = BX_READ_XMM_REG_LO_DWORD(i->rm());
+  }
+  else {
+    /* pointer, segment address pair */
+    read_virtual_dword(i->seg(), RMAddr(i), &op);
+  }
+
+  Float32 result = approximate_reciprocal(op);
+  BX_WRITE_XMM_REG_LO_DWORD(i->nnn(), result);
+
+#else
+  BX_INFO(("RCPSS_VssWss: required SSE, use --enable-sse option"));
+  UndefinedOpcode(i);
+#endif
+}
+
+void BX_CPU_C::RSQRTPS_VpsWps(bxInstruction_c *i)
+{
+#if BX_SUPPORT_SSE >= 1
+  BX_CPU_THIS_PTR prepareSSE();
+
+  BX_PANIC(("RSQRTPS_VpsWps: SSE instruction still not implemented"));
+#else
+  BX_INFO(("RSQRTPS_VpsWps: required SSE, use --enable-sse option"));
+  UndefinedOpcode(i);
+#endif
+}
+
+void BX_CPU_C::RSQRTSS_VssWss(bxInstruction_c *i)
+{
+#if BX_SUPPORT_SSE >= 1
+  BX_CPU_THIS_PTR prepareSSE();
+
+  BX_PANIC(("RSQRTSS_VssWss: SSE instruction still not implemented"));
+#else
+  BX_INFO(("RSQRTSS_VssWss: required SSE, use --enable-sse option"));
+  UndefinedOpcode(i);
+#endif
+}