Sparc: split FPU and VIS op helpers

Move FPU op helpers to fop_helper.c. Move VIS op helpers to vis_helper.c, compile it only for Sparc64. Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Blue Swirl <blauwirbel@gmail.com>
2011-08-01 07:37:45 +00:00 · 2011-08-01 07:37:45 +00:00 · 1bccec25e1
commit 1bccec25e1
parent 99ca02195c
4 changed files with 800 additions and 745 deletions
--- a/Makefile.target
+++ b/Makefile.target
@ -74,10 +74,11 @@ libobj-y += op_helper.o helper.o
 ifeq ($(TARGET_BASE_ARCH), i386)
 libobj-y += cpuid.o
 endif
 libobj-$(TARGET_SPARC64) += vis_helper.o
 libobj-$(CONFIG_NEED_MMU) += mmu.o
 libobj-$(TARGET_ARM) += neon_helper.o iwmmxt_helper.o
 ifeq ($(TARGET_BASE_ARCH), sparc)
-libobj-y += cpu_init.o
+libobj-y += fop_helper.o cpu_init.o
 endif
 libobj-$(TARGET_SPARC) += int32_helper.o
 libobj-$(TARGET_SPARC64) += int64_helper.o
@ -96,7 +97,7 @@ tcg/tcg.o: cpu.h
 # HELPER_CFLAGS is used for all the code compiled with static register
 # variables
-op_helper.o user-exec.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
+op_helper.o fop_helper.o vis_helper.o user-exec.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
 # Note: this is a workaround. The real fix is to avoid compiling
 # cpu_signal_handler() in user-exec.c.
--- a/target-sparc/fop_helper.c
+++ b/target-sparc/fop_helper.c
@ -0,0 +1,394 @@
 /*
 * FPU op helpers
 *
 *  Copyright (c) 2003-2005 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "cpu.h"
 #include "dyngen-exec.h"
 #include "helper.h"
 #define DT0 (env->dt0)
 #define DT1 (env->dt1)
 #define QT0 (env->qt0)
 #define QT1 (env->qt1)
 #define F_HELPER(name, p) void helper_f##name##p(void)
 #define F_BINOP(name)                                           \
    float32 helper_f ## name ## s (float32 src1, float32 src2)  \
    {                                                           \
        return float32_ ## name (src1, src2, &env->fp_status);  \
    }                                                           \
    F_HELPER(name, d)                                           \
    {                                                           \
        DT0 = float64_ ## name (DT0, DT1, &env->fp_status);     \
    }                                                           \
    F_HELPER(name, q)                                           \
    {                                                           \
        QT0 = float128_ ## name (QT0, QT1, &env->fp_status);    \
    }
 F_BINOP(add);
 F_BINOP(sub);
 F_BINOP(mul);
 F_BINOP(div);
 #undef F_BINOP
 void helper_fsmuld(float32 src1, float32 src2)
 {
    DT0 = float64_mul(float32_to_float64(src1, &env->fp_status),
                      float32_to_float64(src2, &env->fp_status),
                      &env->fp_status);
 }
 void helper_fdmulq(void)
 {
    QT0 = float128_mul(float64_to_float128(DT0, &env->fp_status),
                       float64_to_float128(DT1, &env->fp_status),
                       &env->fp_status);
 }
 float32 helper_fnegs(float32 src)
 {
    return float32_chs(src);
 }
 #ifdef TARGET_SPARC64
 F_HELPER(neg, d)
 {
    DT0 = float64_chs(DT1);
 }
 F_HELPER(neg, q)
 {
    QT0 = float128_chs(QT1);
 }
 #endif
 /* Integer to float conversion.  */
 float32 helper_fitos(int32_t src)
 {
    return int32_to_float32(src, &env->fp_status);
 }
 void helper_fitod(int32_t src)
 {
    DT0 = int32_to_float64(src, &env->fp_status);
 }
 void helper_fitoq(int32_t src)
 {
    QT0 = int32_to_float128(src, &env->fp_status);
 }
 #ifdef TARGET_SPARC64
 float32 helper_fxtos(void)
 {
    return int64_to_float32(*((int64_t *)&DT1), &env->fp_status);
 }
 F_HELPER(xto, d)
 {
    DT0 = int64_to_float64(*((int64_t *)&DT1), &env->fp_status);
 }
 F_HELPER(xto, q)
 {
    QT0 = int64_to_float128(*((int64_t *)&DT1), &env->fp_status);
 }
 #endif
 #undef F_HELPER
 /* floating point conversion */
 float32 helper_fdtos(void)
 {
    return float64_to_float32(DT1, &env->fp_status);
 }
 void helper_fstod(float32 src)
 {
    DT0 = float32_to_float64(src, &env->fp_status);
 }
 float32 helper_fqtos(void)
 {
    return float128_to_float32(QT1, &env->fp_status);
 }
 void helper_fstoq(float32 src)
 {
    QT0 = float32_to_float128(src, &env->fp_status);
 }
 void helper_fqtod(void)
 {
    DT0 = float128_to_float64(QT1, &env->fp_status);
 }
 void helper_fdtoq(void)
 {
    QT0 = float64_to_float128(DT1, &env->fp_status);
 }
 /* Float to integer conversion.  */
 int32_t helper_fstoi(float32 src)
 {
    return float32_to_int32_round_to_zero(src, &env->fp_status);
 }
 int32_t helper_fdtoi(void)
 {
    return float64_to_int32_round_to_zero(DT1, &env->fp_status);
 }
 int32_t helper_fqtoi(void)
 {
    return float128_to_int32_round_to_zero(QT1, &env->fp_status);
 }
 #ifdef TARGET_SPARC64
 void helper_fstox(float32 src)
 {
    *((int64_t *)&DT0) = float32_to_int64_round_to_zero(src, &env->fp_status);
 }
 void helper_fdtox(void)
 {
    *((int64_t *)&DT0) = float64_to_int64_round_to_zero(DT1, &env->fp_status);
 }
 void helper_fqtox(void)
 {
    *((int64_t *)&DT0) = float128_to_int64_round_to_zero(QT1, &env->fp_status);
 }
 #endif
 float32 helper_fabss(float32 src)
 {
    return float32_abs(src);
 }
 #ifdef TARGET_SPARC64
 void helper_fabsd(void)
 {
    DT0 = float64_abs(DT1);
 }
 void helper_fabsq(void)
 {
    QT0 = float128_abs(QT1);
 }
 #endif
 float32 helper_fsqrts(float32 src)
 {
    return float32_sqrt(src, &env->fp_status);
 }
 void helper_fsqrtd(void)
 {
    DT0 = float64_sqrt(DT1, &env->fp_status);
 }
 void helper_fsqrtq(void)
 {
    QT0 = float128_sqrt(QT1, &env->fp_status);
 }
 #define GEN_FCMP(name, size, reg1, reg2, FS, E)                         \
    void glue(helper_, name) (void)                                     \
    {                                                                   \
        env->fsr &= FSR_FTT_NMASK;                                      \
        if (E && (glue(size, _is_any_nan)(reg1) ||                      \
                  glue(size, _is_any_nan)(reg2)) &&                     \
            (env->fsr & FSR_NVM)) {                                     \
            env->fsr |= FSR_NVC;                                        \
            env->fsr |= FSR_FTT_IEEE_EXCP;                              \
            helper_raise_exception(env, TT_FP_EXCP);                    \
        }                                                               \
        switch (glue(size, _compare) (reg1, reg2, &env->fp_status)) {   \
        case float_relation_unordered:                                  \
            if ((env->fsr & FSR_NVM)) {                                 \
                env->fsr |= FSR_NVC;                                    \
                env->fsr |= FSR_FTT_IEEE_EXCP;                          \
                helper_raise_exception(env, TT_FP_EXCP);                \
            } else {                                                    \
                env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);             \
                env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                \
                env->fsr |= FSR_NVA;                                    \
            }                                                           \
            break;                                                      \
        case float_relation_less:                                       \
            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
            env->fsr |= FSR_FCC0 << FS;                                 \
            break;                                                      \
        case float_relation_greater:                                    \
            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
            env->fsr |= FSR_FCC1 << FS;                                 \
            break;                                                      \
        default:                                                        \
            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
            break;                                                      \
        }                                                               \
    }
 #define GEN_FCMPS(name, size, FS, E)                                    \
    void glue(helper_, name)(float32 src1, float32 src2)                \
    {                                                                   \
        env->fsr &= FSR_FTT_NMASK;                                      \
        if (E && (glue(size, _is_any_nan)(src1) ||                      \
                  glue(size, _is_any_nan)(src2)) &&                     \
            (env->fsr & FSR_NVM)) {                                     \
            env->fsr |= FSR_NVC;                                        \
            env->fsr |= FSR_FTT_IEEE_EXCP;                              \
            helper_raise_exception(env, TT_FP_EXCP);                    \
        }                                                               \
        switch (glue(size, _compare) (src1, src2, &env->fp_status)) {   \
        case float_relation_unordered:                                  \
            if ((env->fsr & FSR_NVM)) {                                 \
                env->fsr |= FSR_NVC;                                    \
                env->fsr |= FSR_FTT_IEEE_EXCP;                          \
                helper_raise_exception(env, TT_FP_EXCP);                \
            } else {                                                    \
                env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);             \
                env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                \
                env->fsr |= FSR_NVA;                                    \
            }                                                           \
            break;                                                      \
        case float_relation_less:                                       \
            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
            env->fsr |= FSR_FCC0 << FS;                                 \
            break;                                                      \
        case float_relation_greater:                                    \
            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
            env->fsr |= FSR_FCC1 << FS;                                 \
            break;                                                      \
        default:                                                        \
            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
            break;                                                      \
        }                                                               \
    }
 GEN_FCMPS(fcmps, float32, 0, 0);
 GEN_FCMP(fcmpd, float64, DT0, DT1, 0, 0);
 GEN_FCMPS(fcmpes, float32, 0, 1);
 GEN_FCMP(fcmped, float64, DT0, DT1, 0, 1);
 GEN_FCMP(fcmpq, float128, QT0, QT1, 0, 0);
 GEN_FCMP(fcmpeq, float128, QT0, QT1, 0, 1);
 #ifdef TARGET_SPARC64
 GEN_FCMPS(fcmps_fcc1, float32, 22, 0);
 GEN_FCMP(fcmpd_fcc1, float64, DT0, DT1, 22, 0);
 GEN_FCMP(fcmpq_fcc1, float128, QT0, QT1, 22, 0);
 GEN_FCMPS(fcmps_fcc2, float32, 24, 0);
 GEN_FCMP(fcmpd_fcc2, float64, DT0, DT1, 24, 0);
 GEN_FCMP(fcmpq_fcc2, float128, QT0, QT1, 24, 0);
 GEN_FCMPS(fcmps_fcc3, float32, 26, 0);
 GEN_FCMP(fcmpd_fcc3, float64, DT0, DT1, 26, 0);
 GEN_FCMP(fcmpq_fcc3, float128, QT0, QT1, 26, 0);
 GEN_FCMPS(fcmpes_fcc1, float32, 22, 1);
 GEN_FCMP(fcmped_fcc1, float64, DT0, DT1, 22, 1);
 GEN_FCMP(fcmpeq_fcc1, float128, QT0, QT1, 22, 1);
 GEN_FCMPS(fcmpes_fcc2, float32, 24, 1);
 GEN_FCMP(fcmped_fcc2, float64, DT0, DT1, 24, 1);
 GEN_FCMP(fcmpeq_fcc2, float128, QT0, QT1, 24, 1);
 GEN_FCMPS(fcmpes_fcc3, float32, 26, 1);
 GEN_FCMP(fcmped_fcc3, float64, DT0, DT1, 26, 1);
 GEN_FCMP(fcmpeq_fcc3, float128, QT0, QT1, 26, 1);
 #endif
 #undef GEN_FCMPS
 void helper_check_ieee_exceptions(void)
 {
    target_ulong status;
    status = get_float_exception_flags(&env->fp_status);
    if (status) {
        /* Copy IEEE 754 flags into FSR */
        if (status & float_flag_invalid) {
            env->fsr |= FSR_NVC;
        }
        if (status & float_flag_overflow) {
            env->fsr |= FSR_OFC;
        }
        if (status & float_flag_underflow) {
            env->fsr |= FSR_UFC;
        }
        if (status & float_flag_divbyzero) {
            env->fsr |= FSR_DZC;
        }
        if (status & float_flag_inexact) {
            env->fsr |= FSR_NXC;
        }
        if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23)) {
            /* Unmasked exception, generate a trap */
            env->fsr |= FSR_FTT_IEEE_EXCP;
            helper_raise_exception(env, TT_FP_EXCP);
        } else {
            /* Accumulate exceptions */
            env->fsr |= (env->fsr & FSR_CEXC_MASK) << 5;
        }
    }
 }
 void helper_clear_float_exceptions(void)
 {
    set_float_exception_flags(0, &env->fp_status);
 }
 static inline void set_fsr(void)
 {
    int rnd_mode;
    switch (env->fsr & FSR_RD_MASK) {
    case FSR_RD_NEAREST:
        rnd_mode = float_round_nearest_even;
        break;
    default:
    case FSR_RD_ZERO:
        rnd_mode = float_round_to_zero;
        break;
    case FSR_RD_POS:
        rnd_mode = float_round_up;
        break;
    case FSR_RD_NEG:
        rnd_mode = float_round_down;
        break;
    }
    set_float_rounding_mode(rnd_mode, &env->fp_status);
 }
 void helper_ldfsr(uint32_t new_fsr)
 {
    env->fsr = (new_fsr & FSR_LDFSR_MASK) | (env->fsr & FSR_LDFSR_OLDMASK);
    set_fsr();
 }
 #ifdef TARGET_SPARC64
 void helper_ldxfsr(uint64_t new_fsr)
 {
    env->fsr = (new_fsr & FSR_LDXFSR_MASK) | (env->fsr & FSR_LDXFSR_OLDMASK);
    set_fsr();
 }
 #endif
--- a/target-sparc/op_helper.c
+++ b/target-sparc/op_helper.c
@ -333,655 +333,6 @@ void helper_check_align(target_ulong addr, uint32_t align)
    }
 }
 #define F_HELPER(name, p) void helper_f##name##p(void)
 #define F_BINOP(name)                                           \
    float32 helper_f ## name ## s (float32 src1, float32 src2)  \
    {                                                           \
        return float32_ ## name (src1, src2, &env->fp_status);  \
    }                                                           \
    F_HELPER(name, d)                                           \
    {                                                           \
        DT0 = float64_ ## name (DT0, DT1, &env->fp_status);     \
    }                                                           \
    F_HELPER(name, q)                                           \
    {                                                           \
        QT0 = float128_ ## name (QT0, QT1, &env->fp_status);    \
    }
 F_BINOP(add);
 F_BINOP(sub);
 F_BINOP(mul);
 F_BINOP(div);
 #undef F_BINOP
 void helper_fsmuld(float32 src1, float32 src2)
 {
    DT0 = float64_mul(float32_to_float64(src1, &env->fp_status),
                      float32_to_float64(src2, &env->fp_status),
                      &env->fp_status);
 }
 void helper_fdmulq(void)
 {
    QT0 = float128_mul(float64_to_float128(DT0, &env->fp_status),
                       float64_to_float128(DT1, &env->fp_status),
                       &env->fp_status);
 }
 float32 helper_fnegs(float32 src)
 {
    return float32_chs(src);
 }
 #ifdef TARGET_SPARC64
 F_HELPER(neg, d)
 {
    DT0 = float64_chs(DT1);
 }
 F_HELPER(neg, q)
 {
    QT0 = float128_chs(QT1);
 }
 #endif
 /* Integer to float conversion.  */
 float32 helper_fitos(int32_t src)
 {
    return int32_to_float32(src, &env->fp_status);
 }
 void helper_fitod(int32_t src)
 {
    DT0 = int32_to_float64(src, &env->fp_status);
 }
 void helper_fitoq(int32_t src)
 {
    QT0 = int32_to_float128(src, &env->fp_status);
 }
 #ifdef TARGET_SPARC64
 float32 helper_fxtos(void)
 {
    return int64_to_float32(*((int64_t *)&DT1), &env->fp_status);
 }
 F_HELPER(xto, d)
 {
    DT0 = int64_to_float64(*((int64_t *)&DT1), &env->fp_status);
 }
 F_HELPER(xto, q)
 {
    QT0 = int64_to_float128(*((int64_t *)&DT1), &env->fp_status);
 }
 #endif
 #undef F_HELPER
 /* floating point conversion */
 float32 helper_fdtos(void)
 {
    return float64_to_float32(DT1, &env->fp_status);
 }
 void helper_fstod(float32 src)
 {
    DT0 = float32_to_float64(src, &env->fp_status);
 }
 float32 helper_fqtos(void)
 {
    return float128_to_float32(QT1, &env->fp_status);
 }
 void helper_fstoq(float32 src)
 {
    QT0 = float32_to_float128(src, &env->fp_status);
 }
 void helper_fqtod(void)
 {
    DT0 = float128_to_float64(QT1, &env->fp_status);
 }
 void helper_fdtoq(void)
 {
    QT0 = float64_to_float128(DT1, &env->fp_status);
 }
 /* Float to integer conversion.  */
 int32_t helper_fstoi(float32 src)
 {
    return float32_to_int32_round_to_zero(src, &env->fp_status);
 }
 int32_t helper_fdtoi(void)
 {
    return float64_to_int32_round_to_zero(DT1, &env->fp_status);
 }
 int32_t helper_fqtoi(void)
 {
    return float128_to_int32_round_to_zero(QT1, &env->fp_status);
 }
 #ifdef TARGET_SPARC64
 void helper_fstox(float32 src)
 {
    *((int64_t *)&DT0) = float32_to_int64_round_to_zero(src, &env->fp_status);
 }
 void helper_fdtox(void)
 {
    *((int64_t *)&DT0) = float64_to_int64_round_to_zero(DT1, &env->fp_status);
 }
 void helper_fqtox(void)
 {
    *((int64_t *)&DT0) = float128_to_int64_round_to_zero(QT1, &env->fp_status);
 }
 void helper_faligndata(void)
 {
    uint64_t tmp;
    tmp = (*((uint64_t *)&DT0)) << ((env->gsr & 7) * 8);
    /* on many architectures a shift of 64 does nothing */
    if ((env->gsr & 7) != 0) {
        tmp |= (*((uint64_t *)&DT1)) >> (64 - (env->gsr & 7) * 8);
    }
    *((uint64_t *)&DT0) = tmp;
 }
 #ifdef HOST_WORDS_BIGENDIAN
 #define VIS_B64(n) b[7 - (n)]
 #define VIS_W64(n) w[3 - (n)]
 #define VIS_SW64(n) sw[3 - (n)]
 #define VIS_L64(n) l[1 - (n)]
 #define VIS_B32(n) b[3 - (n)]
 #define VIS_W32(n) w[1 - (n)]
 #else
 #define VIS_B64(n) b[n]
 #define VIS_W64(n) w[n]
 #define VIS_SW64(n) sw[n]
 #define VIS_L64(n) l[n]
 #define VIS_B32(n) b[n]
 #define VIS_W32(n) w[n]
 #endif
 typedef union {
    uint8_t b[8];
    uint16_t w[4];
    int16_t sw[4];
    uint32_t l[2];
    uint64_t ll;
    float64 d;
 } VIS64;
 typedef union {
    uint8_t b[4];
    uint16_t w[2];
    uint32_t l;
    float32 f;
 } VIS32;
 void helper_fpmerge(void)
 {
    VIS64 s, d;
    s.d = DT0;
    d.d = DT1;
    /* Reverse calculation order to handle overlap */
    d.VIS_B64(7) = s.VIS_B64(3);
    d.VIS_B64(6) = d.VIS_B64(3);
    d.VIS_B64(5) = s.VIS_B64(2);
    d.VIS_B64(4) = d.VIS_B64(2);
    d.VIS_B64(3) = s.VIS_B64(1);
    d.VIS_B64(2) = d.VIS_B64(1);
    d.VIS_B64(1) = s.VIS_B64(0);
    /* d.VIS_B64(0) = d.VIS_B64(0); */
    DT0 = d.d;
 }
 void helper_fmul8x16(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                 \
    tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
    if ((tmp & 0xff) > 0x7f) {                                  \
        tmp += 0x100;                                           \
    }                                                           \
    d.VIS_W64(r) = tmp >> 8;
    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fmul8x16al(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                 \
    tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
    if ((tmp & 0xff) > 0x7f) {                                  \
        tmp += 0x100;                                           \
    }                                                           \
    d.VIS_W64(r) = tmp >> 8;
    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fmul8x16au(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                 \
    tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
    if ((tmp & 0xff) > 0x7f) {                                  \
        tmp += 0x100;                                           \
    }                                                           \
    d.VIS_W64(r) = tmp >> 8;
    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fmul8sux16(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                         \
    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
    if ((tmp & 0xff) > 0x7f) {                                          \
        tmp += 0x100;                                                   \
    }                                                                   \
    d.VIS_W64(r) = tmp >> 8;
    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fmul8ulx16(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                         \
    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
    if ((tmp & 0xff) > 0x7f) {                                          \
        tmp += 0x100;                                                   \
    }                                                                   \
    d.VIS_W64(r) = tmp >> 8;
    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fmuld8sux16(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                         \
    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
    if ((tmp & 0xff) > 0x7f) {                                          \
        tmp += 0x100;                                                   \
    }                                                                   \
    d.VIS_L64(r) = tmp;
    /* Reverse calculation order to handle overlap */
    PMUL(1);
    PMUL(0);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fmuld8ulx16(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                         \
    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
    if ((tmp & 0xff) > 0x7f) {                                          \
        tmp += 0x100;                                                   \
    }                                                                   \
    d.VIS_L64(r) = tmp;
    /* Reverse calculation order to handle overlap */
    PMUL(1);
    PMUL(0);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fexpand(void)
 {
    VIS32 s;
    VIS64 d;
    s.l = (uint32_t)(*(uint64_t *)&DT0 & 0xffffffff);
    d.d = DT1;
    d.VIS_W64(0) = s.VIS_B32(0) << 4;
    d.VIS_W64(1) = s.VIS_B32(1) << 4;
    d.VIS_W64(2) = s.VIS_B32(2) << 4;
    d.VIS_W64(3) = s.VIS_B32(3) << 4;
    DT0 = d.d;
 }
 #define VIS_HELPER(name, F)                             \
    void name##16(void)                                 \
    {                                                   \
        VIS64 s, d;                                     \
                                                        \
        s.d = DT0;                                      \
        d.d = DT1;                                      \
                                                        \
        d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
        d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
        d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
        d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
                                                        \
        DT0 = d.d;                                      \
    }                                                   \
                                                        \
    uint32_t name##16s(uint32_t src1, uint32_t src2)    \
    {                                                   \
        VIS32 s, d;                                     \
                                                        \
        s.l = src1;                                     \
        d.l = src2;                                     \
                                                        \
        d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
        d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
                                                        \
        return d.l;                                     \
    }                                                   \
                                                        \
    void name##32(void)                                 \
    {                                                   \
        VIS64 s, d;                                     \
                                                        \
        s.d = DT0;                                      \
        d.d = DT1;                                      \
                                                        \
        d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
        d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
                                                        \
        DT0 = d.d;                                      \
    }                                                   \
                                                        \
    uint32_t name##32s(uint32_t src1, uint32_t src2)    \
    {                                                   \
        VIS32 s, d;                                     \
                                                        \
        s.l = src1;                                     \
        d.l = src2;                                     \
                                                        \
        d.l = F(d.l, s.l);                              \
                                                        \
        return d.l;                                     \
    }
 #define FADD(a, b) ((a) + (b))
 #define FSUB(a, b) ((a) - (b))
 VIS_HELPER(helper_fpadd, FADD)
 VIS_HELPER(helper_fpsub, FSUB)
 #define VIS_CMPHELPER(name, F)                                    \
    uint64_t name##16(void)                                       \
    {                                                             \
        VIS64 s, d;                                               \
                                                                  \
        s.d = DT0;                                                \
        d.d = DT1;                                                \
                                                                  \
        d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
        d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
        d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
        d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
        d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
                                                                  \
        return d.ll;                                              \
    }                                                             \
                                                                  \
    uint64_t name##32(void)                                       \
    {                                                             \
        VIS64 s, d;                                               \
                                                                  \
        s.d = DT0;                                                \
        d.d = DT1;                                                \
                                                                  \
        d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
        d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
        d.VIS_L64(1) = 0;                                         \
                                                                  \
        return d.ll;                                              \
    }
 #define FCMPGT(a, b) ((a) > (b))
 #define FCMPEQ(a, b) ((a) == (b))
 #define FCMPLE(a, b) ((a) <= (b))
 #define FCMPNE(a, b) ((a) != (b))
 VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
 VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
 VIS_CMPHELPER(helper_fcmple, FCMPLE)
 VIS_CMPHELPER(helper_fcmpne, FCMPNE)
 #endif
 void helper_check_ieee_exceptions(void)
 {
    target_ulong status;
    status = get_float_exception_flags(&env->fp_status);
    if (status) {
        /* Copy IEEE 754 flags into FSR */
        if (status & float_flag_invalid) {
            env->fsr |= FSR_NVC;
        }
        if (status & float_flag_overflow) {
            env->fsr |= FSR_OFC;
        }
        if (status & float_flag_underflow) {
            env->fsr |= FSR_UFC;
        }
        if (status & float_flag_divbyzero) {
            env->fsr |= FSR_DZC;
        }
        if (status & float_flag_inexact) {
            env->fsr |= FSR_NXC;
        }
        if ((env->fsr & FSR_CEXC_MASK) & ((env->fsr & FSR_TEM_MASK) >> 23)) {
            /* Unmasked exception, generate a trap */
            env->fsr |= FSR_FTT_IEEE_EXCP;
            helper_raise_exception(env, TT_FP_EXCP);
        } else {
            /* Accumulate exceptions */
            env->fsr |= (env->fsr & FSR_CEXC_MASK) << 5;
        }
    }
 }
 void helper_clear_float_exceptions(void)
 {
    set_float_exception_flags(0, &env->fp_status);
 }
 float32 helper_fabss(float32 src)
 {
    return float32_abs(src);
 }
 #ifdef TARGET_SPARC64
 void helper_fabsd(void)
 {
    DT0 = float64_abs(DT1);
 }
 void helper_fabsq(void)
 {
    QT0 = float128_abs(QT1);
 }
 #endif
 float32 helper_fsqrts(float32 src)
 {
    return float32_sqrt(src, &env->fp_status);
 }
 void helper_fsqrtd(void)
 {
    DT0 = float64_sqrt(DT1, &env->fp_status);
 }
 void helper_fsqrtq(void)
 {
    QT0 = float128_sqrt(QT1, &env->fp_status);
 }
 #define GEN_FCMP(name, size, reg1, reg2, FS, E)                         \
    void glue(helper_, name) (void)                                     \
    {                                                                   \
        env->fsr &= FSR_FTT_NMASK;                                      \
        if (E && (glue(size, _is_any_nan)(reg1) ||                      \
                  glue(size, _is_any_nan)(reg2)) &&                     \
            (env->fsr & FSR_NVM)) {                                     \
            env->fsr |= FSR_NVC;                                        \
            env->fsr |= FSR_FTT_IEEE_EXCP;                              \
            helper_raise_exception(env, TT_FP_EXCP);                    \
        }                                                               \
        switch (glue(size, _compare) (reg1, reg2, &env->fp_status)) {   \
        case float_relation_unordered:                                  \
            if ((env->fsr & FSR_NVM)) {                                 \
                env->fsr |= FSR_NVC;                                    \
                env->fsr |= FSR_FTT_IEEE_EXCP;                          \
                helper_raise_exception(env, TT_FP_EXCP);                \
            } else {                                                    \
                env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);             \
                env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                \
                env->fsr |= FSR_NVA;                                    \
            }                                                           \
            break;                                                      \
        case float_relation_less:                                       \
            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
            env->fsr |= FSR_FCC0 << FS;                                 \
            break;                                                      \
        case float_relation_greater:                                    \
            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
            env->fsr |= FSR_FCC1 << FS;                                 \
            break;                                                      \
        default:                                                        \
            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
            break;                                                      \
        }                                                               \
    }
 #define GEN_FCMPS(name, size, FS, E)                                    \
    void glue(helper_, name)(float32 src1, float32 src2)                \
    {                                                                   \
        env->fsr &= FSR_FTT_NMASK;                                      \
        if (E && (glue(size, _is_any_nan)(src1) ||                      \
                  glue(size, _is_any_nan)(src2)) &&                     \
            (env->fsr & FSR_NVM)) {                                     \
            env->fsr |= FSR_NVC;                                        \
            env->fsr |= FSR_FTT_IEEE_EXCP;                              \
            helper_raise_exception(env, TT_FP_EXCP);                    \
        }                                                               \
        switch (glue(size, _compare) (src1, src2, &env->fp_status)) {   \
        case float_relation_unordered:                                  \
            if ((env->fsr & FSR_NVM)) {                                 \
                env->fsr |= FSR_NVC;                                    \
                env->fsr |= FSR_FTT_IEEE_EXCP;                          \
                helper_raise_exception(env, TT_FP_EXCP);                \
            } else {                                                    \
                env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);             \
                env->fsr |= (FSR_FCC1 | FSR_FCC0) << FS;                \
                env->fsr |= FSR_NVA;                                    \
            }                                                           \
            break;                                                      \
        case float_relation_less:                                       \
            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
            env->fsr |= FSR_FCC0 << FS;                                 \
            break;                                                      \
        case float_relation_greater:                                    \
            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
            env->fsr |= FSR_FCC1 << FS;                                 \
            break;                                                      \
        default:                                                        \
            env->fsr &= ~((FSR_FCC1 | FSR_FCC0) << FS);                 \
            break;                                                      \
        }                                                               \
    }
 GEN_FCMPS(fcmps, float32, 0, 0);
 GEN_FCMP(fcmpd, float64, DT0, DT1, 0, 0);
 GEN_FCMPS(fcmpes, float32, 0, 1);
 GEN_FCMP(fcmped, float64, DT0, DT1, 0, 1);
 GEN_FCMP(fcmpq, float128, QT0, QT1, 0, 0);
 GEN_FCMP(fcmpeq, float128, QT0, QT1, 0, 1);
 static uint32_t compute_all_flags(void)
 {
    return env->psr & PSR_ICC;
@ -1580,33 +931,6 @@ int cpu_cwp_dec(CPUState *env1, int cwp)
    return ret;
 }
 #ifdef TARGET_SPARC64
 GEN_FCMPS(fcmps_fcc1, float32, 22, 0);
 GEN_FCMP(fcmpd_fcc1, float64, DT0, DT1, 22, 0);
 GEN_FCMP(fcmpq_fcc1, float128, QT0, QT1, 22, 0);
 GEN_FCMPS(fcmps_fcc2, float32, 24, 0);
 GEN_FCMP(fcmpd_fcc2, float64, DT0, DT1, 24, 0);
 GEN_FCMP(fcmpq_fcc2, float128, QT0, QT1, 24, 0);
 GEN_FCMPS(fcmps_fcc3, float32, 26, 0);
 GEN_FCMP(fcmpd_fcc3, float64, DT0, DT1, 26, 0);
 GEN_FCMP(fcmpq_fcc3, float128, QT0, QT1, 26, 0);
 GEN_FCMPS(fcmpes_fcc1, float32, 22, 1);
 GEN_FCMP(fcmped_fcc1, float64, DT0, DT1, 22, 1);
 GEN_FCMP(fcmpeq_fcc1, float128, QT0, QT1, 22, 1);
 GEN_FCMPS(fcmpes_fcc2, float32, 24, 1);
 GEN_FCMP(fcmped_fcc2, float64, DT0, DT1, 24, 1);
 GEN_FCMP(fcmpeq_fcc2, float128, QT0, QT1, 24, 1);
 GEN_FCMPS(fcmpes_fcc3, float32, 26, 1);
 GEN_FCMP(fcmped_fcc3, float64, DT0, DT1, 26, 1);
 GEN_FCMP(fcmpeq_fcc3, float128, QT0, QT1, 26, 1);
 #endif
 #undef GEN_FCMPS
 #if !defined(TARGET_SPARC64) && !defined(CONFIG_USER_ONLY) &&   \
    defined(DEBUG_MXCC)
 static void dump_mxcc(CPUState *env)
@ -3748,42 +3072,6 @@ void helper_stqf(target_ulong addr, int mem_idx)
 #endif
 }
 static inline void set_fsr(void)
 {
    int rnd_mode;
    switch (env->fsr & FSR_RD_MASK) {
    case FSR_RD_NEAREST:
        rnd_mode = float_round_nearest_even;
        break;
    default:
    case FSR_RD_ZERO:
        rnd_mode = float_round_to_zero;
        break;
    case FSR_RD_POS:
        rnd_mode = float_round_up;
        break;
    case FSR_RD_NEG:
        rnd_mode = float_round_down;
        break;
    }
    set_float_rounding_mode(rnd_mode, &env->fp_status);
 }
 void helper_ldfsr(uint32_t new_fsr)
 {
    env->fsr = (new_fsr & FSR_LDFSR_MASK) | (env->fsr & FSR_LDFSR_OLDMASK);
    set_fsr();
 }
 #ifdef TARGET_SPARC64
 void helper_ldxfsr(uint64_t new_fsr)
 {
    env->fsr = (new_fsr & FSR_LDXFSR_MASK) | (env->fsr & FSR_LDXFSR_OLDMASK);
    set_fsr();
 }
 #endif
 #ifndef TARGET_SPARC64
 /* XXX: use another pointer for %iN registers to avoid slow wrapping
   handling ? */
@ -3993,37 +3281,6 @@ void helper_wrcwp(target_ulong new_cwp)
    put_cwp64(new_cwp);
 }
 /* This function uses non-native bit order */
 #define GET_FIELD(X, FROM, TO)                                  \
    ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
 /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
 #define GET_FIELD_SP(X, FROM, TO)               \
    GET_FIELD(X, 63 - (TO), 63 - (FROM))
 target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
 {
    return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
        (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
        (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
        (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
        (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
        (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
        (((pixel_addr >> 55) & 1) << 4) |
        (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
        GET_FIELD_SP(pixel_addr, 11, 12);
 }
 target_ulong helper_alignaddr(target_ulong addr, target_ulong offset)
 {
    uint64_t tmp;
    tmp = addr + offset;
    env->gsr &= ~7ULL;
    env->gsr |= tmp & 7ULL;
    return tmp & ~7ULL;
 }
 static inline uint64_t *get_gregset(uint32_t pstate)
 {
    switch (pstate) {
--- a/target-sparc/vis_helper.c
+++ b/target-sparc/vis_helper.c
@ -0,0 +1,403 @@
 /*
 * VIS op helpers
 *
 *  Copyright (c) 2003-2005 Fabrice Bellard
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
 */
 #include "cpu.h"
 #include "dyngen-exec.h"
 #include "helper.h"
 #define DT0 (env->dt0)
 #define DT1 (env->dt1)
 #define QT0 (env->qt0)
 #define QT1 (env->qt1)
 /* This function uses non-native bit order */
 #define GET_FIELD(X, FROM, TO)                                  \
    ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
 /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
 #define GET_FIELD_SP(X, FROM, TO)               \
    GET_FIELD(X, 63 - (TO), 63 - (FROM))
 target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
 {
    return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
        (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
        (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
        (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
        (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
        (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
        (((pixel_addr >> 55) & 1) << 4) |
        (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
        GET_FIELD_SP(pixel_addr, 11, 12);
 }
 target_ulong helper_alignaddr(target_ulong addr, target_ulong offset)
 {
    uint64_t tmp;
    tmp = addr + offset;
    env->gsr &= ~7ULL;
    env->gsr |= tmp & 7ULL;
    return tmp & ~7ULL;
 }
 void helper_faligndata(void)
 {
    uint64_t tmp;
    tmp = (*((uint64_t *)&DT0)) << ((env->gsr & 7) * 8);
    /* on many architectures a shift of 64 does nothing */
    if ((env->gsr & 7) != 0) {
        tmp |= (*((uint64_t *)&DT1)) >> (64 - (env->gsr & 7) * 8);
    }
    *((uint64_t *)&DT0) = tmp;
 }
 #ifdef HOST_WORDS_BIGENDIAN
 #define VIS_B64(n) b[7 - (n)]
 #define VIS_W64(n) w[3 - (n)]
 #define VIS_SW64(n) sw[3 - (n)]
 #define VIS_L64(n) l[1 - (n)]
 #define VIS_B32(n) b[3 - (n)]
 #define VIS_W32(n) w[1 - (n)]
 #else
 #define VIS_B64(n) b[n]
 #define VIS_W64(n) w[n]
 #define VIS_SW64(n) sw[n]
 #define VIS_L64(n) l[n]
 #define VIS_B32(n) b[n]
 #define VIS_W32(n) w[n]
 #endif
 typedef union {
    uint8_t b[8];
    uint16_t w[4];
    int16_t sw[4];
    uint32_t l[2];
    uint64_t ll;
    float64 d;
 } VIS64;
 typedef union {
    uint8_t b[4];
    uint16_t w[2];
    uint32_t l;
    float32 f;
 } VIS32;
 void helper_fpmerge(void)
 {
    VIS64 s, d;
    s.d = DT0;
    d.d = DT1;
    /* Reverse calculation order to handle overlap */
    d.VIS_B64(7) = s.VIS_B64(3);
    d.VIS_B64(6) = d.VIS_B64(3);
    d.VIS_B64(5) = s.VIS_B64(2);
    d.VIS_B64(4) = d.VIS_B64(2);
    d.VIS_B64(3) = s.VIS_B64(1);
    d.VIS_B64(2) = d.VIS_B64(1);
    d.VIS_B64(1) = s.VIS_B64(0);
    /* d.VIS_B64(0) = d.VIS_B64(0); */
    DT0 = d.d;
 }
 void helper_fmul8x16(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                 \
    tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
    if ((tmp & 0xff) > 0x7f) {                                  \
        tmp += 0x100;                                           \
    }                                                           \
    d.VIS_W64(r) = tmp >> 8;
    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fmul8x16al(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                 \
    tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
    if ((tmp & 0xff) > 0x7f) {                                  \
        tmp += 0x100;                                           \
    }                                                           \
    d.VIS_W64(r) = tmp >> 8;
    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fmul8x16au(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                 \
    tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
    if ((tmp & 0xff) > 0x7f) {                                  \
        tmp += 0x100;                                           \
    }                                                           \
    d.VIS_W64(r) = tmp >> 8;
    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fmul8sux16(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                         \
    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
    if ((tmp & 0xff) > 0x7f) {                                          \
        tmp += 0x100;                                                   \
    }                                                                   \
    d.VIS_W64(r) = tmp >> 8;
    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fmul8ulx16(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                         \
    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
    if ((tmp & 0xff) > 0x7f) {                                          \
        tmp += 0x100;                                                   \
    }                                                                   \
    d.VIS_W64(r) = tmp >> 8;
    PMUL(0);
    PMUL(1);
    PMUL(2);
    PMUL(3);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fmuld8sux16(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                         \
    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
    if ((tmp & 0xff) > 0x7f) {                                          \
        tmp += 0x100;                                                   \
    }                                                                   \
    d.VIS_L64(r) = tmp;
    /* Reverse calculation order to handle overlap */
    PMUL(1);
    PMUL(0);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fmuld8ulx16(void)
 {
    VIS64 s, d;
    uint32_t tmp;
    s.d = DT0;
    d.d = DT1;
 #define PMUL(r)                                                         \
    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
    if ((tmp & 0xff) > 0x7f) {                                          \
        tmp += 0x100;                                                   \
    }                                                                   \
    d.VIS_L64(r) = tmp;
    /* Reverse calculation order to handle overlap */
    PMUL(1);
    PMUL(0);
 #undef PMUL
    DT0 = d.d;
 }
 void helper_fexpand(void)
 {
    VIS32 s;
    VIS64 d;
    s.l = (uint32_t)(*(uint64_t *)&DT0 & 0xffffffff);
    d.d = DT1;
    d.VIS_W64(0) = s.VIS_B32(0) << 4;
    d.VIS_W64(1) = s.VIS_B32(1) << 4;
    d.VIS_W64(2) = s.VIS_B32(2) << 4;
    d.VIS_W64(3) = s.VIS_B32(3) << 4;
    DT0 = d.d;
 }
 #define VIS_HELPER(name, F)                             \
    void name##16(void)                                 \
    {                                                   \
        VIS64 s, d;                                     \
                                                        \
        s.d = DT0;                                      \
        d.d = DT1;                                      \
                                                        \
        d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
        d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
        d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
        d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
                                                        \
        DT0 = d.d;                                      \
    }                                                   \
                                                        \
    uint32_t name##16s(uint32_t src1, uint32_t src2)    \
    {                                                   \
        VIS32 s, d;                                     \
                                                        \
        s.l = src1;                                     \
        d.l = src2;                                     \
                                                        \
        d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
        d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
                                                        \
        return d.l;                                     \
    }                                                   \
                                                        \
    void name##32(void)                                 \
    {                                                   \
        VIS64 s, d;                                     \
                                                        \
        s.d = DT0;                                      \
        d.d = DT1;                                      \
                                                        \
        d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
        d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
                                                        \
        DT0 = d.d;                                      \
    }                                                   \
                                                        \
    uint32_t name##32s(uint32_t src1, uint32_t src2)    \
    {                                                   \
        VIS32 s, d;                                     \
                                                        \
        s.l = src1;                                     \
        d.l = src2;                                     \
                                                        \
        d.l = F(d.l, s.l);                              \
                                                        \
        return d.l;                                     \
    }
 #define FADD(a, b) ((a) + (b))
 #define FSUB(a, b) ((a) - (b))
 VIS_HELPER(helper_fpadd, FADD)
 VIS_HELPER(helper_fpsub, FSUB)
 #define VIS_CMPHELPER(name, F)                                    \
    uint64_t name##16(void)                                       \
    {                                                             \
        VIS64 s, d;                                               \
                                                                  \
        s.d = DT0;                                                \
        d.d = DT1;                                                \
                                                                  \
        d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
        d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
        d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
        d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
        d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
                                                                  \
        return d.ll;                                              \
    }                                                             \
                                                                  \
    uint64_t name##32(void)                                       \
    {                                                             \
        VIS64 s, d;                                               \
                                                                  \
        s.d = DT0;                                                \
        d.d = DT1;                                                \
                                                                  \
        d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
        d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
        d.VIS_L64(1) = 0;                                         \
                                                                  \
        return d.ll;                                              \
    }
 #define FCMPGT(a, b) ((a) > (b))
 #define FCMPEQ(a, b) ((a) == (b))
 #define FCMPLE(a, b) ((a) <= (b))
 #define FCMPNE(a, b) ((a) != (b))
 VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
 VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
 VIS_CMPHELPER(helper_fcmple, FCMPLE)
 VIS_CMPHELPER(helper_fcmpne, FCMPNE)