qemu/target/ppc/translate/vmx-impl.c.inc

/*
 * translate/vmx-impl.c
 *
 * Altivec/VMX translation
 */

/***                      Altivec vector extension                         ***/
/* Altivec registers moves */

static inline TCGv_ptr gen_avr_ptr(int reg)
{
    TCGv_ptr r = tcg_temp_new_ptr();
    tcg_gen_addi_ptr(r, cpu_env, avr_full_offset(reg));
    return r;
}

#define GEN_VR_LDX(name, opc2, opc3)                                          \
static void glue(gen_, name)(DisasContext *ctx)                               \
{                                                                             \
    TCGv EA;                                                                  \
    TCGv_i64 avr;                                                             \
    if (unlikely(!ctx->altivec_enabled)) {                                    \
        gen_exception(ctx, POWERPC_EXCP_VPU);                                 \
        return;                                                               \
    }                                                                         \
    gen_set_access_type(ctx, ACCESS_INT);                                     \
    avr = tcg_temp_new_i64();                                                 \
    EA = tcg_temp_new();                                                      \
    gen_addr_reg_index(ctx, EA);                                              \
    tcg_gen_andi_tl(EA, EA, ~0xf);                                            \
    /*                                                                        \
     * We only need to swap high and low halves. gen_qemu_ld64_i64            \
     * does necessary 64-bit byteswap already.                                \
     */                                                                       \
    if (ctx->le_mode) {                                                       \
        gen_qemu_ld64_i64(ctx, avr, EA);                                      \
        set_avr64(rD(ctx->opcode), avr, false);                               \
        tcg_gen_addi_tl(EA, EA, 8);                                           \
        gen_qemu_ld64_i64(ctx, avr, EA);                                      \
        set_avr64(rD(ctx->opcode), avr, true);                                \
    } else {                                                                  \
        gen_qemu_ld64_i64(ctx, avr, EA);                                      \
        set_avr64(rD(ctx->opcode), avr, true);                                \
        tcg_gen_addi_tl(EA, EA, 8);                                           \
        gen_qemu_ld64_i64(ctx, avr, EA);                                      \
        set_avr64(rD(ctx->opcode), avr, false);                               \
    }                                                                         \
    tcg_temp_free(EA);                                                        \
    tcg_temp_free_i64(avr);                                                   \
}

#define GEN_VR_STX(name, opc2, opc3)                                          \
static void gen_st##name(DisasContext *ctx)                                   \
{                                                                             \
    TCGv EA;                                                                  \
    TCGv_i64 avr;                                                             \
    if (unlikely(!ctx->altivec_enabled)) {                                    \
        gen_exception(ctx, POWERPC_EXCP_VPU);                                 \
        return;                                                               \
    }                                                                         \
    gen_set_access_type(ctx, ACCESS_INT);                                     \
    avr = tcg_temp_new_i64();                                                 \
    EA = tcg_temp_new();                                                      \
    gen_addr_reg_index(ctx, EA);                                              \
    tcg_gen_andi_tl(EA, EA, ~0xf);                                            \
    /*                                                                        \
     * We only need to swap high and low halves. gen_qemu_st64_i64            \
     * does necessary 64-bit byteswap already.                                \
     */                                                                       \
    if (ctx->le_mode) {                                                       \
        get_avr64(avr, rD(ctx->opcode), false);                               \
        gen_qemu_st64_i64(ctx, avr, EA);                                      \
        tcg_gen_addi_tl(EA, EA, 8);                                           \
        get_avr64(avr, rD(ctx->opcode), true);                                \
        gen_qemu_st64_i64(ctx, avr, EA);                                      \
    } else {                                                                  \
        get_avr64(avr, rD(ctx->opcode), true);                                \
        gen_qemu_st64_i64(ctx, avr, EA);                                      \
        tcg_gen_addi_tl(EA, EA, 8);                                           \
        get_avr64(avr, rD(ctx->opcode), false);                               \
        gen_qemu_st64_i64(ctx, avr, EA);                                      \
    }                                                                         \
    tcg_temp_free(EA);                                                        \
    tcg_temp_free_i64(avr);                                                   \
}

#define GEN_VR_LVE(name, opc2, opc3, size)                              \
static void gen_lve##name(DisasContext *ctx)                            \
    {                                                                   \
        TCGv EA;                                                        \
        TCGv_ptr rs;                                                    \
        if (unlikely(!ctx->altivec_enabled)) {                          \
            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
            return;                                                     \
        }                                                               \
        gen_set_access_type(ctx, ACCESS_INT);                           \
        EA = tcg_temp_new();                                            \
        gen_addr_reg_index(ctx, EA);                                    \
        if (size > 1) {                                                 \
            tcg_gen_andi_tl(EA, EA, ~(size - 1));                       \
        }                                                               \
        rs = gen_avr_ptr(rS(ctx->opcode));                              \
        gen_helper_lve##name(cpu_env, rs, EA);                          \
        tcg_temp_free(EA);                                              \
        tcg_temp_free_ptr(rs);                                          \
    }

#define GEN_VR_STVE(name, opc2, opc3, size)                             \
static void gen_stve##name(DisasContext *ctx)                           \
    {                                                                   \
        TCGv EA;                                                        \
        TCGv_ptr rs;                                                    \
        if (unlikely(!ctx->altivec_enabled)) {                          \
            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
            return;                                                     \
        }                                                               \
        gen_set_access_type(ctx, ACCESS_INT);                           \
        EA = tcg_temp_new();                                            \
        gen_addr_reg_index(ctx, EA);                                    \
        if (size > 1) {                                                 \
            tcg_gen_andi_tl(EA, EA, ~(size - 1));                       \
        }                                                               \
        rs = gen_avr_ptr(rS(ctx->opcode));                              \
        gen_helper_stve##name(cpu_env, rs, EA);                         \
        tcg_temp_free(EA);                                              \
        tcg_temp_free_ptr(rs);                                          \
    }

GEN_VR_LDX(lvx, 0x07, 0x03);
/* As we don't emulate the cache, lvxl is stricly equivalent to lvx */
GEN_VR_LDX(lvxl, 0x07, 0x0B);

GEN_VR_LVE(bx, 0x07, 0x00, 1);
GEN_VR_LVE(hx, 0x07, 0x01, 2);
GEN_VR_LVE(wx, 0x07, 0x02, 4);

GEN_VR_STX(svx, 0x07, 0x07);
/* As we don't emulate the cache, stvxl is stricly equivalent to stvx */
GEN_VR_STX(svxl, 0x07, 0x0F);

GEN_VR_STVE(bx, 0x07, 0x04, 1);
GEN_VR_STVE(hx, 0x07, 0x05, 2);
GEN_VR_STVE(wx, 0x07, 0x06, 4);

static void gen_mfvscr(DisasContext *ctx)
{
    TCGv_i32 t;
    TCGv_i64 avr;
    if (unlikely(!ctx->altivec_enabled)) {
        gen_exception(ctx, POWERPC_EXCP_VPU);
        return;
    }
    avr = tcg_temp_new_i64();
    tcg_gen_movi_i64(avr, 0);
    set_avr64(rD(ctx->opcode), avr, true);
    t = tcg_temp_new_i32();
    gen_helper_mfvscr(t, cpu_env);
    tcg_gen_extu_i32_i64(avr, t);
    set_avr64(rD(ctx->opcode), avr, false);
    tcg_temp_free_i32(t);
    tcg_temp_free_i64(avr);
}

static void gen_mtvscr(DisasContext *ctx)
{
    TCGv_i32 val;
    int bofs;

    if (unlikely(!ctx->altivec_enabled)) {
        gen_exception(ctx, POWERPC_EXCP_VPU);
        return;
    }

    val = tcg_temp_new_i32();
    bofs = avr_full_offset(rB(ctx->opcode));
#if HOST_BIG_ENDIAN
    bofs += 3 * 4;
#endif

    tcg_gen_ld_i32(val, cpu_env, bofs);
    gen_helper_mtvscr(cpu_env, val);
    tcg_temp_free_i32(val);
}

#define GEN_VX_VMUL10(name, add_cin, ret_carry)                         \
static void glue(gen_, name)(DisasContext *ctx)                         \
{                                                                       \
    TCGv_i64 t0;                                                        \
    TCGv_i64 t1;                                                        \
    TCGv_i64 t2;                                                        \
    TCGv_i64 avr;                                                       \
    TCGv_i64 ten, z;                                                    \
                                                                        \
    if (unlikely(!ctx->altivec_enabled)) {                              \
        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
        return;                                                         \
    }                                                                   \
                                                                        \
    t0 = tcg_temp_new_i64();                                            \
    t1 = tcg_temp_new_i64();                                            \
    t2 = tcg_temp_new_i64();                                            \
    avr = tcg_temp_new_i64();                                           \
    ten = tcg_const_i64(10);                                            \
    z = tcg_const_i64(0);                                               \
                                                                        \
    if (add_cin) {                                                      \
        get_avr64(avr, rA(ctx->opcode), false);                         \
        tcg_gen_mulu2_i64(t0, t1, avr, ten);                            \
        get_avr64(avr, rB(ctx->opcode), false);                         \
        tcg_gen_andi_i64(t2, avr, 0xF);                                 \
        tcg_gen_add2_i64(avr, t2, t0, t1, t2, z);                       \
        set_avr64(rD(ctx->opcode), avr, false);                         \
    } else {                                                            \
        get_avr64(avr, rA(ctx->opcode), false);                         \
        tcg_gen_mulu2_i64(avr, t2, avr, ten);                           \
        set_avr64(rD(ctx->opcode), avr, false);                         \
    }                                                                   \
                                                                        \
    if (ret_carry) {                                                    \
        get_avr64(avr, rA(ctx->opcode), true);                          \
        tcg_gen_mulu2_i64(t0, t1, avr, ten);                            \
        tcg_gen_add2_i64(t0, avr, t0, t1, t2, z);                       \
        set_avr64(rD(ctx->opcode), avr, false);                         \
        set_avr64(rD(ctx->opcode), z, true);                            \
    } else {                                                            \
        get_avr64(avr, rA(ctx->opcode), true);                          \
        tcg_gen_mul_i64(t0, avr, ten);                                  \
        tcg_gen_add_i64(avr, t0, t2);                                   \
        set_avr64(rD(ctx->opcode), avr, true);                          \
    }                                                                   \
                                                                        \
    tcg_temp_free_i64(t0);                                              \
    tcg_temp_free_i64(t1);                                              \
    tcg_temp_free_i64(t2);                                              \
    tcg_temp_free_i64(avr);                                             \
    tcg_temp_free_i64(ten);                                             \
    tcg_temp_free_i64(z);                                               \
}                                                                       \

GEN_VX_VMUL10(vmul10uq, 0, 0);
GEN_VX_VMUL10(vmul10euq, 1, 0);
GEN_VX_VMUL10(vmul10cuq, 0, 1);
GEN_VX_VMUL10(vmul10ecuq, 1, 1);

#define GEN_VXFORM_V(name, vece, tcg_op, opc2, opc3)                    \
static void glue(gen_, name)(DisasContext *ctx)                         \
{                                                                       \
    if (unlikely(!ctx->altivec_enabled)) {                              \
        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
        return;                                                         \
    }                                                                   \
                                                                        \
    tcg_op(vece,                                                        \
           avr_full_offset(rD(ctx->opcode)),                            \
           avr_full_offset(rA(ctx->opcode)),                            \
           avr_full_offset(rB(ctx->opcode)),                            \
           16, 16);                                                     \
}

/* Logical operations */
GEN_VXFORM_V(vand, MO_64, tcg_gen_gvec_and, 2, 16);
GEN_VXFORM_V(vandc, MO_64, tcg_gen_gvec_andc, 2, 17);
GEN_VXFORM_V(vor, MO_64, tcg_gen_gvec_or, 2, 18);
GEN_VXFORM_V(vxor, MO_64, tcg_gen_gvec_xor, 2, 19);
GEN_VXFORM_V(vnor, MO_64, tcg_gen_gvec_nor, 2, 20);
GEN_VXFORM_V(veqv, MO_64, tcg_gen_gvec_eqv, 2, 26);
GEN_VXFORM_V(vnand, MO_64, tcg_gen_gvec_nand, 2, 22);
GEN_VXFORM_V(vorc, MO_64, tcg_gen_gvec_orc, 2, 21);

#define GEN_VXFORM(name, opc2, opc3)                                    \
static void glue(gen_, name)(DisasContext *ctx)                         \
{                                                                       \
    TCGv_ptr ra, rb, rd;                                                \
    if (unlikely(!ctx->altivec_enabled)) {                              \
        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
        return;                                                         \
    }                                                                   \
    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
    gen_helper_##name(rd, ra, rb);                                      \
    tcg_temp_free_ptr(ra);                                              \
    tcg_temp_free_ptr(rb);                                              \
    tcg_temp_free_ptr(rd);                                              \
}

#define GEN_VXFORM_TRANS(name, opc2, opc3)                              \
static void glue(gen_, name)(DisasContext *ctx)                         \
{                                                                       \
    if (unlikely(!ctx->altivec_enabled)) {                              \
        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
        return;                                                         \
    }                                                                   \
    trans_##name(ctx);                                                  \
}

#define GEN_VXFORM_ENV(name, opc2, opc3)                                \
static void glue(gen_, name)(DisasContext *ctx)                         \
{                                                                       \
    TCGv_ptr ra, rb, rd;                                                \
    if (unlikely(!ctx->altivec_enabled)) {                              \
        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
        return;                                                         \
    }                                                                   \
    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
    gen_helper_##name(cpu_env, rd, ra, rb);                             \
    tcg_temp_free_ptr(ra);                                              \
    tcg_temp_free_ptr(rb);                                              \
    tcg_temp_free_ptr(rd);                                              \
}

#define GEN_VXFORM3(name, opc2, opc3)                                   \
static void glue(gen_, name)(DisasContext *ctx)                         \
{                                                                       \
    TCGv_ptr ra, rb, rc, rd;                                            \
    if (unlikely(!ctx->altivec_enabled)) {                              \
        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
        return;                                                         \
    }                                                                   \
    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
    rc = gen_avr_ptr(rC(ctx->opcode));                                  \
    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
    gen_helper_##name(rd, ra, rb, rc);                                  \
    tcg_temp_free_ptr(ra);                                              \
    tcg_temp_free_ptr(rb);                                              \
    tcg_temp_free_ptr(rc);                                              \
    tcg_temp_free_ptr(rd);                                              \
}

/*
 * Support for Altivec instruction pairs that use bit 31 (Rc) as
 * an opcode bit.  In general, these pairs come from different
 * versions of the ISA, so we must also support a pair of flags for
 * each instruction.
 */
#define GEN_VXFORM_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)          \
static void glue(gen_, name0##_##name1)(DisasContext *ctx)             \
{                                                                      \
    if ((Rc(ctx->opcode) == 0) &&                                      \
        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
        gen_##name0(ctx);                                              \
    } else if ((Rc(ctx->opcode) == 1) &&                               \
        ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
        gen_##name1(ctx);                                              \
    } else {                                                           \
        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);            \
    }                                                                  \
}

/*
 * We use this macro if one instruction is realized with direct
 * translation, and second one with helper.
 */
#define GEN_VXFORM_TRANS_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)\
static void glue(gen_, name0##_##name1)(DisasContext *ctx)             \
{                                                                      \
    if ((Rc(ctx->opcode) == 0) &&                                      \
        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
        if (unlikely(!ctx->altivec_enabled)) {                         \
            gen_exception(ctx, POWERPC_EXCP_VPU);                      \
            return;                                                    \
        }                                                              \
        trans_##name0(ctx);                                            \
    } else if ((Rc(ctx->opcode) == 1) &&                               \
        ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
        gen_##name1(ctx);                                              \
    } else {                                                           \
        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);            \
    }                                                                  \
}

/* Adds support to provide invalid mask */
#define GEN_VXFORM_DUAL_EXT(name0, flg0, flg2_0, inval0,                \
                            name1, flg1, flg2_1, inval1)                \
static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
{                                                                       \
    if ((Rc(ctx->opcode) == 0) &&                                       \
        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0)) &&  \
        !(ctx->opcode & inval0)) {                                      \
        gen_##name0(ctx);                                               \
    } else if ((Rc(ctx->opcode) == 1) &&                                \
               ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1)) && \
               !(ctx->opcode & inval1)) {                               \
        gen_##name1(ctx);                                               \
    } else {                                                            \
        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);             \
    }                                                                   \
}

#define GEN_VXFORM_HETRO(name, opc2, opc3)                              \
static void glue(gen_, name)(DisasContext *ctx)                         \
{                                                                       \
    TCGv_ptr rb;                                                        \
    if (unlikely(!ctx->altivec_enabled)) {                              \
        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
        return;                                                         \
    }                                                                   \
    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
    gen_helper_##name(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], rb); \
    tcg_temp_free_ptr(rb);                                              \
}

GEN_VXFORM_V(vaddubm, MO_8, tcg_gen_gvec_add, 0, 0);
GEN_VXFORM_DUAL_EXT(vaddubm, PPC_ALTIVEC, PPC_NONE, 0,       \
                    vmul10cuq, PPC_NONE, PPC2_ISA300, 0x0000F800)
GEN_VXFORM_V(vadduhm, MO_16, tcg_gen_gvec_add, 0, 1);
GEN_VXFORM_DUAL(vadduhm, PPC_ALTIVEC, PPC_NONE,  \
                vmul10ecuq, PPC_NONE, PPC2_ISA300)
GEN_VXFORM_V(vadduwm, MO_32, tcg_gen_gvec_add, 0, 2);
GEN_VXFORM_V(vaddudm, MO_64, tcg_gen_gvec_add, 0, 3);
GEN_VXFORM_V(vsububm, MO_8, tcg_gen_gvec_sub, 0, 16);
GEN_VXFORM_V(vsubuhm, MO_16, tcg_gen_gvec_sub, 0, 17);
GEN_VXFORM_V(vsubuwm, MO_32, tcg_gen_gvec_sub, 0, 18);
GEN_VXFORM_V(vsubudm, MO_64, tcg_gen_gvec_sub, 0, 19);
GEN_VXFORM_V(vmaxub, MO_8, tcg_gen_gvec_umax, 1, 0);
GEN_VXFORM_V(vmaxuh, MO_16, tcg_gen_gvec_umax, 1, 1);
GEN_VXFORM_V(vmaxuw, MO_32, tcg_gen_gvec_umax, 1, 2);
GEN_VXFORM_V(vmaxud, MO_64, tcg_gen_gvec_umax, 1, 3);
GEN_VXFORM_V(vmaxsb, MO_8, tcg_gen_gvec_smax, 1, 4);
GEN_VXFORM_V(vmaxsh, MO_16, tcg_gen_gvec_smax, 1, 5);
GEN_VXFORM_V(vmaxsw, MO_32, tcg_gen_gvec_smax, 1, 6);
GEN_VXFORM_V(vmaxsd, MO_64, tcg_gen_gvec_smax, 1, 7);
GEN_VXFORM_V(vminub, MO_8, tcg_gen_gvec_umin, 1, 8);
GEN_VXFORM_V(vminuh, MO_16, tcg_gen_gvec_umin, 1, 9);
GEN_VXFORM_V(vminuw, MO_32, tcg_gen_gvec_umin, 1, 10);
GEN_VXFORM_V(vminud, MO_64, tcg_gen_gvec_umin, 1, 11);
GEN_VXFORM_V(vminsb, MO_8, tcg_gen_gvec_smin, 1, 12);
GEN_VXFORM_V(vminsh, MO_16, tcg_gen_gvec_smin, 1, 13);
GEN_VXFORM_V(vminsw, MO_32, tcg_gen_gvec_smin, 1, 14);
GEN_VXFORM_V(vminsd, MO_64, tcg_gen_gvec_smin, 1, 15);
GEN_VXFORM(vavgub, 1, 16);
GEN_VXFORM(vabsdub, 1, 16);
GEN_VXFORM_DUAL(vavgub, PPC_ALTIVEC, PPC_NONE, \
                vabsdub, PPC_NONE, PPC2_ISA300)
GEN_VXFORM(vavguh, 1, 17);
GEN_VXFORM(vabsduh, 1, 17);
GEN_VXFORM_DUAL(vavguh, PPC_ALTIVEC, PPC_NONE, \
                vabsduh, PPC_NONE, PPC2_ISA300)
GEN_VXFORM(vavguw, 1, 18);
GEN_VXFORM(vabsduw, 1, 18);
GEN_VXFORM_DUAL(vavguw, PPC_ALTIVEC, PPC_NONE, \
                vabsduw, PPC_NONE, PPC2_ISA300)
GEN_VXFORM(vavgsb, 1, 20);
GEN_VXFORM(vavgsh, 1, 21);
GEN_VXFORM(vavgsw, 1, 22);
GEN_VXFORM(vmrghb, 6, 0);
GEN_VXFORM(vmrghh, 6, 1);
GEN_VXFORM(vmrghw, 6, 2);
GEN_VXFORM(vmrglb, 6, 4);
GEN_VXFORM(vmrglh, 6, 5);
GEN_VXFORM(vmrglw, 6, 6);

static void trans_vmrgew(DisasContext *ctx)
{
    int VT = rD(ctx->opcode);
    int VA = rA(ctx->opcode);
    int VB = rB(ctx->opcode);
    TCGv_i64 tmp = tcg_temp_new_i64();
    TCGv_i64 avr = tcg_temp_new_i64();

    get_avr64(avr, VB, true);
    tcg_gen_shri_i64(tmp, avr, 32);
    get_avr64(avr, VA, true);
    tcg_gen_deposit_i64(avr, avr, tmp, 0, 32);
    set_avr64(VT, avr, true);

    get_avr64(avr, VB, false);
    tcg_gen_shri_i64(tmp, avr, 32);
    get_avr64(avr, VA, false);
    tcg_gen_deposit_i64(avr, avr, tmp, 0, 32);
    set_avr64(VT, avr, false);

    tcg_temp_free_i64(tmp);
    tcg_temp_free_i64(avr);
}

static void trans_vmrgow(DisasContext *ctx)
{
    int VT = rD(ctx->opcode);
    int VA = rA(ctx->opcode);
    int VB = rB(ctx->opcode);
    TCGv_i64 t0 = tcg_temp_new_i64();
    TCGv_i64 t1 = tcg_temp_new_i64();
    TCGv_i64 avr = tcg_temp_new_i64();

    get_avr64(t0, VB, true);
    get_avr64(t1, VA, true);
    tcg_gen_deposit_i64(avr, t0, t1, 32, 32);
    set_avr64(VT, avr, true);

    get_avr64(t0, VB, false);
    get_avr64(t1, VA, false);
    tcg_gen_deposit_i64(avr, t0, t1, 32, 32);
    set_avr64(VT, avr, false);

    tcg_temp_free_i64(t0);
    tcg_temp_free_i64(t1);
    tcg_temp_free_i64(avr);
}

/*
 * lvsl VRT,RA,RB - Load Vector for Shift Left
 *
 * Let the EA be the sum (rA|0)+(rB). Let sh=EA[28–31].
 * Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F.
 * Bytes sh:sh+15 of X are placed into vD.
 */
static void trans_lvsl(DisasContext *ctx)
{
    int VT = rD(ctx->opcode);
    TCGv_i64 result = tcg_temp_new_i64();
    TCGv_i64 sh = tcg_temp_new_i64();
    TCGv EA = tcg_temp_new();

    /* Get sh(from description) by anding EA with 0xf. */
    gen_addr_reg_index(ctx, EA);
    tcg_gen_extu_tl_i64(sh, EA);
    tcg_gen_andi_i64(sh, sh, 0xfULL);

    /*
     * Create bytes sh:sh+7 of X(from description) and place them in
     * higher doubleword of vD.
     */
    tcg_gen_muli_i64(sh, sh, 0x0101010101010101ULL);
    tcg_gen_addi_i64(result, sh, 0x0001020304050607ull);
    set_avr64(VT, result, true);
    /*
     * Create bytes sh+8:sh+15 of X(from description) and place them in
     * lower doubleword of vD.
     */
    tcg_gen_addi_i64(result, sh, 0x08090a0b0c0d0e0fULL);
    set_avr64(VT, result, false);

    tcg_temp_free_i64(result);
    tcg_temp_free_i64(sh);
    tcg_temp_free(EA);
}

/*
 * lvsr VRT,RA,RB - Load Vector for Shift Right
 *
 * Let the EA be the sum (rA|0)+(rB). Let sh=EA[28–31].
 * Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F.
 * Bytes (16-sh):(31-sh) of X are placed into vD.
 */
static void trans_lvsr(DisasContext *ctx)
{
    int VT = rD(ctx->opcode);
    TCGv_i64 result = tcg_temp_new_i64();
    TCGv_i64 sh = tcg_temp_new_i64();
    TCGv EA = tcg_temp_new();


    /* Get sh(from description) by anding EA with 0xf. */
    gen_addr_reg_index(ctx, EA);
    tcg_gen_extu_tl_i64(sh, EA);
    tcg_gen_andi_i64(sh, sh, 0xfULL);

    /*
     * Create bytes (16-sh):(23-sh) of X(from description) and place them in
     * higher doubleword of vD.
     */
    tcg_gen_muli_i64(sh, sh, 0x0101010101010101ULL);
    tcg_gen_subfi_i64(result, 0x1011121314151617ULL, sh);
    set_avr64(VT, result, true);
    /*
     * Create bytes (24-sh):(32-sh) of X(from description) and place them in
     * lower doubleword of vD.
     */
    tcg_gen_subfi_i64(result, 0x18191a1b1c1d1e1fULL, sh);
    set_avr64(VT, result, false);

    tcg_temp_free_i64(result);
    tcg_temp_free_i64(sh);
    tcg_temp_free(EA);
}

/*
 * vsl VRT,VRA,VRB - Vector Shift Left
 *
 * Shifting left 128 bit value of vA by value specified in bits 125-127 of vB.
 * Lowest 3 bits in each byte element of register vB must be identical or
 * result is undefined.
 */
static void trans_vsl(DisasContext *ctx)
{
    int VT = rD(ctx->opcode);
    int VA = rA(ctx->opcode);
    int VB = rB(ctx->opcode);
    TCGv_i64 avr = tcg_temp_new_i64();
    TCGv_i64 sh = tcg_temp_new_i64();
    TCGv_i64 carry = tcg_temp_new_i64();
    TCGv_i64 tmp = tcg_temp_new_i64();

    /* Place bits 125-127 of vB in 'sh'. */
    get_avr64(avr, VB, false);
    tcg_gen_andi_i64(sh, avr, 0x07ULL);

    /*
     * Save highest 'sh' bits of lower doubleword element of vA in variable
     * 'carry' and perform shift on lower doubleword.
     */
    get_avr64(avr, VA, false);
    tcg_gen_subfi_i64(tmp, 32, sh);
    tcg_gen_shri_i64(carry, avr, 32);
    tcg_gen_shr_i64(carry, carry, tmp);
    tcg_gen_shl_i64(avr, avr, sh);
    set_avr64(VT, avr, false);

    /*
     * Perform shift on higher doubleword element of vA and replace lowest
     * 'sh' bits with 'carry'.
     */
    get_avr64(avr, VA, true);
    tcg_gen_shl_i64(avr, avr, sh);
    tcg_gen_or_i64(avr, avr, carry);
    set_avr64(VT, avr, true);

    tcg_temp_free_i64(avr);
    tcg_temp_free_i64(sh);
    tcg_temp_free_i64(carry);
    tcg_temp_free_i64(tmp);
}

/*
 * vsr VRT,VRA,VRB - Vector Shift Right
 *
 * Shifting right 128 bit value of vA by value specified in bits 125-127 of vB.
 * Lowest 3 bits in each byte element of register vB must be identical or
 * result is undefined.
 */
static void trans_vsr(DisasContext *ctx)
{
    int VT = rD(ctx->opcode);
    int VA = rA(ctx->opcode);
    int VB = rB(ctx->opcode);
    TCGv_i64 avr = tcg_temp_new_i64();
    TCGv_i64 sh = tcg_temp_new_i64();
    TCGv_i64 carry = tcg_temp_new_i64();
    TCGv_i64 tmp = tcg_temp_new_i64();

    /* Place bits 125-127 of vB in 'sh'. */
    get_avr64(avr, VB, false);
    tcg_gen_andi_i64(sh, avr, 0x07ULL);

    /*
     * Save lowest 'sh' bits of higher doubleword element of vA in variable
     * 'carry' and perform shift on higher doubleword.
     */
    get_avr64(avr, VA, true);
    tcg_gen_subfi_i64(tmp, 32, sh);
    tcg_gen_shli_i64(carry, avr, 32);
    tcg_gen_shl_i64(carry, carry, tmp);
    tcg_gen_shr_i64(avr, avr, sh);
    set_avr64(VT, avr, true);
    /*
     * Perform shift on lower doubleword element of vA and replace highest
     * 'sh' bits with 'carry'.
     */
    get_avr64(avr, VA, false);
    tcg_gen_shr_i64(avr, avr, sh);
    tcg_gen_or_i64(avr, avr, carry);
    set_avr64(VT, avr, false);

    tcg_temp_free_i64(avr);
    tcg_temp_free_i64(sh);
    tcg_temp_free_i64(carry);
    tcg_temp_free_i64(tmp);
}

/*
 * vgbbd VRT,VRB - Vector Gather Bits by Bytes by Doubleword
 *
 * All ith bits (i in range 1 to 8) of each byte of doubleword element in source
 * register are concatenated and placed into ith byte of appropriate doubleword
 * element in destination register.
 *
 * Following solution is done for both doubleword elements of source register
 * in parallel, in order to reduce the number of instructions needed(that's why
 * arrays are used):
 * First, both doubleword elements of source register vB are placed in
 * appropriate element of array avr. Bits are gathered in 2x8 iterations(2 for
 * loops). In first iteration bit 1 of byte 1, bit 2 of byte 2,... bit 8 of
 * byte 8 are in their final spots so avr[i], i={0,1} can be and-ed with
 * tcg_mask. For every following iteration, both avr[i] and tcg_mask variables
 * have to be shifted right for 7 and 8 places, respectively, in order to get
 * bit 1 of byte 2, bit 2 of byte 3.. bit 7 of byte 8 in their final spots so
 * shifted avr values(saved in tmp) can be and-ed with new value of tcg_mask...
 * After first 8 iteration(first loop), all the first bits are in their final
 * places, all second bits but second bit from eight byte are in their places...
 * only 1 eight bit from eight byte is in it's place). In second loop we do all
 * operations symmetrically, in order to get other half of bits in their final
 * spots. Results for first and second doubleword elements are saved in
 * result[0] and result[1] respectively. In the end those results are saved in
 * appropriate doubleword element of destination register vD.
 */
static void trans_vgbbd(DisasContext *ctx)
{
    int VT = rD(ctx->opcode);
    int VB = rB(ctx->opcode);
    TCGv_i64 tmp = tcg_temp_new_i64();
    uint64_t mask = 0x8040201008040201ULL;
    int i, j;

    TCGv_i64 result[2];
    result[0] = tcg_temp_new_i64();
    result[1] = tcg_temp_new_i64();
    TCGv_i64 avr[2];
    avr[0] = tcg_temp_new_i64();
    avr[1] = tcg_temp_new_i64();
    TCGv_i64 tcg_mask = tcg_temp_new_i64();

    tcg_gen_movi_i64(tcg_mask, mask);
    for (j = 0; j < 2; j++) {
        get_avr64(avr[j], VB, j);
        tcg_gen_and_i64(result[j], avr[j], tcg_mask);
    }
    for (i = 1; i < 8; i++) {
        tcg_gen_movi_i64(tcg_mask, mask >> (i * 8));
        for (j = 0; j < 2; j++) {
            tcg_gen_shri_i64(tmp, avr[j], i * 7);
            tcg_gen_and_i64(tmp, tmp, tcg_mask);
            tcg_gen_or_i64(result[j], result[j], tmp);
        }
    }
    for (i = 1; i < 8; i++) {
        tcg_gen_movi_i64(tcg_mask, mask << (i * 8));
        for (j = 0; j < 2; j++) {
            tcg_gen_shli_i64(tmp, avr[j], i * 7);
            tcg_gen_and_i64(tmp, tmp, tcg_mask);
            tcg_gen_or_i64(result[j], result[j], tmp);
        }
    }
    for (j = 0; j < 2; j++) {
        set_avr64(VT, result[j], j);
    }

    tcg_temp_free_i64(tmp);
    tcg_temp_free_i64(tcg_mask);
    tcg_temp_free_i64(result[0]);
    tcg_temp_free_i64(result[1]);
    tcg_temp_free_i64(avr[0]);
    tcg_temp_free_i64(avr[1]);
}

/*
 * vclzw VRT,VRB - Vector Count Leading Zeros Word
 *
 * Counting the number of leading zero bits of each word element in source
 * register and placing result in appropriate word element of destination
 * register.
 */
static void trans_vclzw(DisasContext *ctx)
{
    int VT = rD(ctx->opcode);
    int VB = rB(ctx->opcode);
    TCGv_i32 tmp = tcg_temp_new_i32();
    int i;

    /* Perform count for every word element using tcg_gen_clzi_i32. */
    for (i = 0; i < 4; i++) {
        tcg_gen_ld_i32(tmp, cpu_env,
            offsetof(CPUPPCState, vsr[32 + VB].u64[0]) + i * 4);
        tcg_gen_clzi_i32(tmp, tmp, 32);
        tcg_gen_st_i32(tmp, cpu_env,
            offsetof(CPUPPCState, vsr[32 + VT].u64[0]) + i * 4);
    }

    tcg_temp_free_i32(tmp);
}

/*
 * vclzd VRT,VRB - Vector Count Leading Zeros Doubleword
 *
 * Counting the number of leading zero bits of each doubleword element in source
 * register and placing result in appropriate doubleword element of destination
 * register.
 */
static void trans_vclzd(DisasContext *ctx)
{
    int VT = rD(ctx->opcode);
    int VB = rB(ctx->opcode);
    TCGv_i64 avr = tcg_temp_new_i64();

    /* high doubleword */
    get_avr64(avr, VB, true);
    tcg_gen_clzi_i64(avr, avr, 64);
    set_avr64(VT, avr, true);

    /* low doubleword */
    get_avr64(avr, VB, false);
    tcg_gen_clzi_i64(avr, avr, 64);
    set_avr64(VT, avr, false);

    tcg_temp_free_i64(avr);
}

GEN_VXFORM_V(vmuluwm, MO_32, tcg_gen_gvec_mul, 4, 2);
GEN_VXFORM(vsrv, 2, 28);
GEN_VXFORM(vslv, 2, 29);
GEN_VXFORM(vslo, 6, 16);
GEN_VXFORM(vsro, 6, 17);
GEN_VXFORM(vaddcuw, 0, 6);
GEN_VXFORM(vsubcuw, 0, 22);

static bool do_vector_gvec3_VX(DisasContext *ctx, arg_VX *a, int vece,
                               void (*gen_gvec)(unsigned, uint32_t, uint32_t,
                                                uint32_t, uint32_t, uint32_t))
{
    REQUIRE_VECTOR(ctx);

    gen_gvec(vece, avr_full_offset(a->vrt), avr_full_offset(a->vra),
             avr_full_offset(a->vrb), 16, 16);

    return true;
}

TRANS_FLAGS(ALTIVEC, VSLB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_shlv);
TRANS_FLAGS(ALTIVEC, VSLH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_shlv);
TRANS_FLAGS(ALTIVEC, VSLW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_shlv);
TRANS_FLAGS2(ALTIVEC_207, VSLD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_shlv);

TRANS_FLAGS(ALTIVEC, VSRB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_shrv);
TRANS_FLAGS(ALTIVEC, VSRH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_shrv);
TRANS_FLAGS(ALTIVEC, VSRW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_shrv);
TRANS_FLAGS2(ALTIVEC_207, VSRD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_shrv);

TRANS_FLAGS(ALTIVEC, VSRAB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_sarv);
TRANS_FLAGS(ALTIVEC, VSRAH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_sarv);
TRANS_FLAGS(ALTIVEC, VSRAW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_sarv);
TRANS_FLAGS2(ALTIVEC_207, VSRAD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_sarv);

TRANS_FLAGS(ALTIVEC, VRLB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_rotlv)
TRANS_FLAGS(ALTIVEC, VRLH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_rotlv)
TRANS_FLAGS(ALTIVEC, VRLW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_rotlv)
TRANS_FLAGS2(ALTIVEC_207, VRLD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_rotlv)

static TCGv_vec do_vrl_mask_vec(unsigned vece, TCGv_vec vrb)
{
    TCGv_vec t0 = tcg_temp_new_vec_matching(vrb),
             t1 = tcg_temp_new_vec_matching(vrb),
             t2 = tcg_temp_new_vec_matching(vrb),
             ones = tcg_constant_vec_matching(vrb, vece, -1);

    /* Extract b and e */
    tcg_gen_dupi_vec(vece, t2, (8 << vece) - 1);

    tcg_gen_shri_vec(vece, t0, vrb, 16);
    tcg_gen_and_vec(vece, t0, t0, t2);

    tcg_gen_shri_vec(vece, t1, vrb, 8);
    tcg_gen_and_vec(vece, t1, t1, t2);

    /* Compare b and e to negate the mask where begin > end */
    tcg_gen_cmp_vec(TCG_COND_GT, vece, t2, t0, t1);

    /* Create the mask with (~0 >> b) ^ ((~0 >> e) >> 1) */
    tcg_gen_shrv_vec(vece, t0, ones, t0);
    tcg_gen_shrv_vec(vece, t1, ones, t1);
    tcg_gen_shri_vec(vece, t1, t1, 1);
    tcg_gen_xor_vec(vece, t0, t0, t1);

    /* negate the mask */
    tcg_gen_xor_vec(vece, t0, t0, t2);

    tcg_temp_free_vec(t1);
    tcg_temp_free_vec(t2);

    return t0;
}

static void gen_vrlnm_vec(unsigned vece, TCGv_vec vrt, TCGv_vec vra,
                          TCGv_vec vrb)
{
    TCGv_vec mask, n = tcg_temp_new_vec_matching(vrt);

    /* Create the mask */
    mask = do_vrl_mask_vec(vece, vrb);

    /* Extract n */
    tcg_gen_dupi_vec(vece, n, (8 << vece) - 1);
    tcg_gen_and_vec(vece, n, vrb, n);

    /* Rotate and mask */
    tcg_gen_rotlv_vec(vece, vrt, vra, n);
    tcg_gen_and_vec(vece, vrt, vrt, mask);

    tcg_temp_free_vec(n);
    tcg_temp_free_vec(mask);
}

static bool do_vrlnm(DisasContext *ctx, arg_VX *a, int vece)
{
    static const TCGOpcode vecop_list[] = {
        INDEX_op_cmp_vec, INDEX_op_rotlv_vec, INDEX_op_sari_vec,
        INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_shrv_vec, 0
    };
    static const GVecGen3 ops[2] = {
        {
            .fniv = gen_vrlnm_vec,
            .fno = gen_helper_VRLWNM,
            .opt_opc = vecop_list,
            .load_dest = true,
            .vece = MO_32
        },
        {
            .fniv = gen_vrlnm_vec,
            .fno = gen_helper_VRLDNM,
            .opt_opc = vecop_list,
            .load_dest = true,
            .vece = MO_64
        }
    };

    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
    REQUIRE_VSX(ctx);

    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
                   avr_full_offset(a->vrb), 16, 16, &ops[vece - 2]);

    return true;
}

TRANS(VRLWNM, do_vrlnm, MO_32)
TRANS(VRLDNM, do_vrlnm, MO_64)

static void gen_vrlmi_vec(unsigned vece, TCGv_vec vrt, TCGv_vec vra,
                          TCGv_vec vrb)
{
    TCGv_vec mask, n = tcg_temp_new_vec_matching(vrt),
             tmp = tcg_temp_new_vec_matching(vrt);

    /* Create the mask */
    mask = do_vrl_mask_vec(vece, vrb);

    /* Extract n */
    tcg_gen_dupi_vec(vece, n, (8 << vece) - 1);
    tcg_gen_and_vec(vece, n, vrb, n);

    /* Rotate and insert */
    tcg_gen_rotlv_vec(vece, tmp, vra, n);
    tcg_gen_bitsel_vec(vece, vrt, mask, tmp, vrt);

    tcg_temp_free_vec(n);
    tcg_temp_free_vec(tmp);
    tcg_temp_free_vec(mask);
}

static bool do_vrlmi(DisasContext *ctx, arg_VX *a, int vece)
{
    static const TCGOpcode vecop_list[] = {
        INDEX_op_cmp_vec, INDEX_op_rotlv_vec, INDEX_op_sari_vec,
        INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_shrv_vec, 0
    };
    static const GVecGen3 ops[2] = {
        {
            .fniv = gen_vrlmi_vec,
            .fno = gen_helper_VRLWMI,
            .opt_opc = vecop_list,
            .load_dest = true,
            .vece = MO_32
        },
        {
            .fniv = gen_vrlnm_vec,
            .fno = gen_helper_VRLDMI,
            .opt_opc = vecop_list,
            .load_dest = true,
            .vece = MO_64
        }
    };

    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
    REQUIRE_VSX(ctx);

    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
                   avr_full_offset(a->vrb), 16, 16, &ops[vece - 2]);

    return true;
}

TRANS(VRLWMI, do_vrlmi, MO_32)
TRANS(VRLDMI, do_vrlmi, MO_64)

static bool do_vector_shift_quad(DisasContext *ctx, arg_VX *a, bool right,
                                 bool alg)
{
    TCGv_i64 hi, lo, t0, t1, n, zero = tcg_constant_i64(0);

    REQUIRE_VECTOR(ctx);

    n = tcg_temp_new_i64();
    hi = tcg_temp_new_i64();
    lo = tcg_temp_new_i64();
    t0 = tcg_temp_new_i64();
    t1 = tcg_const_i64(0);

    get_avr64(lo, a->vra, false);
    get_avr64(hi, a->vra, true);

    get_avr64(n, a->vrb, true);

    tcg_gen_andi_i64(t0, n, 64);
    if (right) {
        tcg_gen_movcond_i64(TCG_COND_NE, lo, t0, zero, hi, lo);
        if (alg) {
            tcg_gen_sari_i64(t1, lo, 63);
        }
        tcg_gen_movcond_i64(TCG_COND_NE, hi, t0, zero, t1, hi);
    } else {
        tcg_gen_movcond_i64(TCG_COND_NE, hi, t0, zero, lo, hi);
        tcg_gen_movcond_i64(TCG_COND_NE, lo, t0, zero, zero, lo);
    }
    tcg_gen_andi_i64(n, n, 0x3F);

    if (right) {
        if (alg) {
            tcg_gen_sar_i64(t0, hi, n);
        } else {
            tcg_gen_shr_i64(t0, hi, n);
        }
    } else {
        tcg_gen_shl_i64(t0, lo, n);
    }
    set_avr64(a->vrt, t0, right);

    if (right) {
        tcg_gen_shr_i64(lo, lo, n);
    } else {
        tcg_gen_shl_i64(hi, hi, n);
    }
    tcg_gen_xori_i64(n, n, 63);
    if (right) {
        tcg_gen_shl_i64(hi, hi, n);
        tcg_gen_shli_i64(hi, hi, 1);
    } else {
        tcg_gen_shr_i64(lo, lo, n);
        tcg_gen_shri_i64(lo, lo, 1);
    }
    tcg_gen_or_i64(hi, hi, lo);
    set_avr64(a->vrt, hi, !right);

    tcg_temp_free_i64(hi);
    tcg_temp_free_i64(lo);
    tcg_temp_free_i64(t0);
    tcg_temp_free_i64(t1);
    tcg_temp_free_i64(n);

    return true;
}

TRANS_FLAGS2(ISA310, VSLQ, do_vector_shift_quad, false, false);
TRANS_FLAGS2(ISA310, VSRQ, do_vector_shift_quad, true, false);
TRANS_FLAGS2(ISA310, VSRAQ, do_vector_shift_quad, true, true);

static void do_vrlq_mask(TCGv_i64 mh, TCGv_i64 ml, TCGv_i64 b, TCGv_i64 e)
{
    TCGv_i64 th, tl, t0, t1, zero = tcg_constant_i64(0),
             ones = tcg_constant_i64(-1);

    th = tcg_temp_new_i64();
    tl = tcg_temp_new_i64();
    t0 = tcg_temp_new_i64();
    t1 = tcg_temp_new_i64();

    /* m = ~0 >> b */
    tcg_gen_andi_i64(t0, b, 64);
    tcg_gen_movcond_i64(TCG_COND_NE, t1, t0, zero, zero, ones);
    tcg_gen_andi_i64(t0, b, 0x3F);
    tcg_gen_shr_i64(mh, t1, t0);
    tcg_gen_shr_i64(ml, ones, t0);
    tcg_gen_xori_i64(t0, t0, 63);
    tcg_gen_shl_i64(t1, t1, t0);
    tcg_gen_shli_i64(t1, t1, 1);
    tcg_gen_or_i64(ml, t1, ml);

    /* t = ~0 >> e */
    tcg_gen_andi_i64(t0, e, 64);
    tcg_gen_movcond_i64(TCG_COND_NE, t1, t0, zero, zero, ones);
    tcg_gen_andi_i64(t0, e, 0x3F);
    tcg_gen_shr_i64(th, t1, t0);
    tcg_gen_shr_i64(tl, ones, t0);
    tcg_gen_xori_i64(t0, t0, 63);
    tcg_gen_shl_i64(t1, t1, t0);
    tcg_gen_shli_i64(t1, t1, 1);
    tcg_gen_or_i64(tl, t1, tl);

    /* t = t >> 1 */
    tcg_gen_extract2_i64(tl, tl, th, 1);
    tcg_gen_shri_i64(th, th, 1);

    /* m = m ^ t */
    tcg_gen_xor_i64(mh, mh, th);
    tcg_gen_xor_i64(ml, ml, tl);

    /* Negate the mask if begin > end */
    tcg_gen_movcond_i64(TCG_COND_GT, t0, b, e, ones, zero);

    tcg_gen_xor_i64(mh, mh, t0);
    tcg_gen_xor_i64(ml, ml, t0);

    tcg_temp_free_i64(th);
    tcg_temp_free_i64(tl);
    tcg_temp_free_i64(t0);
    tcg_temp_free_i64(t1);
}

static bool do_vector_rotl_quad(DisasContext *ctx, arg_VX *a, bool mask,
                                bool insert)
{
    TCGv_i64 ah, al, vrb, n, t0, t1, zero = tcg_constant_i64(0);

    REQUIRE_VECTOR(ctx);
    REQUIRE_INSNS_FLAGS2(ctx, ISA310);

    ah = tcg_temp_new_i64();
    al = tcg_temp_new_i64();
    vrb = tcg_temp_new_i64();
    n = tcg_temp_new_i64();
    t0 = tcg_temp_new_i64();
    t1 = tcg_temp_new_i64();

    get_avr64(ah, a->vra, true);
    get_avr64(al, a->vra, false);
    get_avr64(vrb, a->vrb, true);

    tcg_gen_mov_i64(t0, ah);
    tcg_gen_andi_i64(t1, vrb, 64);
    tcg_gen_movcond_i64(TCG_COND_NE, ah, t1, zero, al, ah);
    tcg_gen_movcond_i64(TCG_COND_NE, al, t1, zero, t0, al);
    tcg_gen_andi_i64(n, vrb, 0x3F);

    tcg_gen_shl_i64(t0, ah, n);
    tcg_gen_shl_i64(t1, al, n);

    tcg_gen_xori_i64(n, n, 63);

    tcg_gen_shr_i64(al, al, n);
    tcg_gen_shri_i64(al, al, 1);
    tcg_gen_or_i64(t0, al, t0);

    tcg_gen_shr_i64(ah, ah, n);
    tcg_gen_shri_i64(ah, ah, 1);
    tcg_gen_or_i64(t1, ah, t1);

    if (mask || insert) {
        tcg_gen_extract_i64(n, vrb, 8, 7);
        tcg_gen_extract_i64(vrb, vrb, 16, 7);

        do_vrlq_mask(ah, al, vrb, n);

        tcg_gen_and_i64(t0, t0, ah);
        tcg_gen_and_i64(t1, t1, al);

        if (insert) {
            get_avr64(n, a->vrt, true);
            get_avr64(vrb, a->vrt, false);
            tcg_gen_andc_i64(n, n, ah);
            tcg_gen_andc_i64(vrb, vrb, al);
            tcg_gen_or_i64(t0, t0, n);
            tcg_gen_or_i64(t1, t1, vrb);
        }
    }

    set_avr64(a->vrt, t0, true);
    set_avr64(a->vrt, t1, false);

    tcg_temp_free_i64(ah);
    tcg_temp_free_i64(al);
    tcg_temp_free_i64(vrb);
    tcg_temp_free_i64(n);
    tcg_temp_free_i64(t0);
    tcg_temp_free_i64(t1);

    return true;
}

TRANS(VRLQ, do_vector_rotl_quad, false, false)
TRANS(VRLQNM, do_vector_rotl_quad, true, false)
TRANS(VRLQMI, do_vector_rotl_quad, false, true)

#define GEN_VXFORM_SAT(NAME, VECE, NORM, SAT, OPC2, OPC3)               \
static void glue(glue(gen_, NAME), _vec)(unsigned vece, TCGv_vec t,     \
                                         TCGv_vec sat, TCGv_vec a,      \
                                         TCGv_vec b)                    \
{                                                                       \
    TCGv_vec x = tcg_temp_new_vec_matching(t);                          \
    glue(glue(tcg_gen_, NORM), _vec)(VECE, x, a, b);                    \
    glue(glue(tcg_gen_, SAT), _vec)(VECE, t, a, b);                     \
    tcg_gen_cmp_vec(TCG_COND_NE, VECE, x, x, t);                        \
    tcg_gen_or_vec(VECE, sat, sat, x);                                  \
    tcg_temp_free_vec(x);                                               \
}                                                                       \
static void glue(gen_, NAME)(DisasContext *ctx)                         \
{                                                                       \
    static const TCGOpcode vecop_list[] = {                             \
        glue(glue(INDEX_op_, NORM), _vec),                              \
        glue(glue(INDEX_op_, SAT), _vec),                               \
        INDEX_op_cmp_vec, 0                                             \
    };                                                                  \
    static const GVecGen4 g = {                                         \
        .fniv = glue(glue(gen_, NAME), _vec),                           \
        .fno = glue(gen_helper_, NAME),                                 \
        .opt_opc = vecop_list,                                          \
        .write_aofs = true,                                             \
        .vece = VECE,                                                   \
    };                                                                  \
    if (unlikely(!ctx->altivec_enabled)) {                              \
        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
        return;                                                         \
    }                                                                   \
    tcg_gen_gvec_4(avr_full_offset(rD(ctx->opcode)),                    \
                   offsetof(CPUPPCState, vscr_sat),                     \
                   avr_full_offset(rA(ctx->opcode)),                    \
                   avr_full_offset(rB(ctx->opcode)),                    \
                   16, 16, &g);                                         \
}

GEN_VXFORM_SAT(vaddubs, MO_8, add, usadd, 0, 8);
GEN_VXFORM_DUAL_EXT(vaddubs, PPC_ALTIVEC, PPC_NONE, 0,       \
                    vmul10uq, PPC_NONE, PPC2_ISA300, 0x0000F800)
GEN_VXFORM_SAT(vadduhs, MO_16, add, usadd, 0, 9);
GEN_VXFORM_DUAL(vadduhs, PPC_ALTIVEC, PPC_NONE, \
                vmul10euq, PPC_NONE, PPC2_ISA300)
GEN_VXFORM_SAT(vadduws, MO_32, add, usadd, 0, 10);
GEN_VXFORM_SAT(vaddsbs, MO_8, add, ssadd, 0, 12);
GEN_VXFORM_SAT(vaddshs, MO_16, add, ssadd, 0, 13);
GEN_VXFORM_SAT(vaddsws, MO_32, add, ssadd, 0, 14);
GEN_VXFORM_SAT(vsububs, MO_8, sub, ussub, 0, 24);
GEN_VXFORM_SAT(vsubuhs, MO_16, sub, ussub, 0, 25);
GEN_VXFORM_SAT(vsubuws, MO_32, sub, ussub, 0, 26);
GEN_VXFORM_SAT(vsubsbs, MO_8, sub, sssub, 0, 28);
GEN_VXFORM_SAT(vsubshs, MO_16, sub, sssub, 0, 29);
GEN_VXFORM_SAT(vsubsws, MO_32, sub, sssub, 0, 30);
GEN_VXFORM(vadduqm, 0, 4);
GEN_VXFORM(vaddcuq, 0, 5);
GEN_VXFORM3(vaddeuqm, 30, 0);
GEN_VXFORM3(vaddecuq, 30, 0);
GEN_VXFORM_DUAL(vaddeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
            vaddecuq, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM(vsubuqm, 0, 20);
GEN_VXFORM(vsubcuq, 0, 21);
GEN_VXFORM3(vsubeuqm, 31, 0);
GEN_VXFORM3(vsubecuq, 31, 0);
GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
            vsubecuq, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM_TRANS(vsl, 2, 7);
GEN_VXFORM_TRANS(vsr, 2, 11);
GEN_VXFORM_ENV(vpkuhum, 7, 0);
GEN_VXFORM_ENV(vpkuwum, 7, 1);
GEN_VXFORM_ENV(vpkudum, 7, 17);
GEN_VXFORM_ENV(vpkuhus, 7, 2);
GEN_VXFORM_ENV(vpkuwus, 7, 3);
GEN_VXFORM_ENV(vpkudus, 7, 19);
GEN_VXFORM_ENV(vpkshus, 7, 4);
GEN_VXFORM_ENV(vpkswus, 7, 5);
GEN_VXFORM_ENV(vpksdus, 7, 21);
GEN_VXFORM_ENV(vpkshss, 7, 6);
GEN_VXFORM_ENV(vpkswss, 7, 7);
GEN_VXFORM_ENV(vpksdss, 7, 23);
GEN_VXFORM(vpkpx, 7, 12);
GEN_VXFORM_ENV(vsum4ubs, 4, 24);
GEN_VXFORM_ENV(vsum4sbs, 4, 28);
GEN_VXFORM_ENV(vsum4shs, 4, 25);
GEN_VXFORM_ENV(vsum2sws, 4, 26);
GEN_VXFORM_ENV(vsumsws, 4, 30);
GEN_VXFORM_ENV(vaddfp, 5, 0);
GEN_VXFORM_ENV(vsubfp, 5, 1);
GEN_VXFORM_ENV(vmaxfp, 5, 16);
GEN_VXFORM_ENV(vminfp, 5, 17);
GEN_VXFORM_HETRO(vextublx, 6, 24)
GEN_VXFORM_HETRO(vextuhlx, 6, 25)
GEN_VXFORM_HETRO(vextuwlx, 6, 26)
GEN_VXFORM_TRANS_DUAL(vmrgow, PPC_NONE, PPC2_ALTIVEC_207,
                vextuwlx, PPC_NONE, PPC2_ISA300)
GEN_VXFORM_HETRO(vextubrx, 6, 28)
GEN_VXFORM_HETRO(vextuhrx, 6, 29)
GEN_VXFORM_HETRO(vextuwrx, 6, 30)
GEN_VXFORM_TRANS(lvsl, 6, 31)
GEN_VXFORM_TRANS(lvsr, 6, 32)
GEN_VXFORM_TRANS_DUAL(vmrgew, PPC_NONE, PPC2_ALTIVEC_207,
                vextuwrx, PPC_NONE, PPC2_ISA300)

#define GEN_VXRFORM1(opname, name, str, opc2, opc3)                     \
static void glue(gen_, name)(DisasContext *ctx)                         \
    {                                                                   \
        TCGv_ptr ra, rb, rd;                                            \
        if (unlikely(!ctx->altivec_enabled)) {                          \
            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
            return;                                                     \
        }                                                               \
        ra = gen_avr_ptr(rA(ctx->opcode));                              \
        rb = gen_avr_ptr(rB(ctx->opcode));                              \
        rd = gen_avr_ptr(rD(ctx->opcode));                              \
        gen_helper_##opname(cpu_env, rd, ra, rb);                       \
        tcg_temp_free_ptr(ra);                                          \
        tcg_temp_free_ptr(rb);                                          \
        tcg_temp_free_ptr(rd);                                          \
    }

#define GEN_VXRFORM(name, opc2, opc3)                                \
    GEN_VXRFORM1(name, name, #name, opc2, opc3)                      \
    GEN_VXRFORM1(name##_dot, name##_, #name ".", opc2, (opc3 | (0x1 << 4)))

/*
 * Support for Altivec instructions that use bit 31 (Rc) as an opcode
 * bit but also use bit 21 as an actual Rc bit.  In general, thse pairs
 * come from different versions of the ISA, so we must also support a
 * pair of flags for each instruction.
 */
#define GEN_VXRFORM_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)     \
static void glue(gen_, name0##_##name1)(DisasContext *ctx)             \
{                                                                      \
    if ((Rc(ctx->opcode) == 0) &&                                      \
        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
        if (Rc21(ctx->opcode) == 0) {                                  \
            gen_##name0(ctx);                                          \
        } else {                                                       \
            gen_##name0##_(ctx);                                       \
        }                                                              \
    } else if ((Rc(ctx->opcode) == 1) &&                               \
        ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
        if (Rc21(ctx->opcode) == 0) {                                  \
            gen_##name1(ctx);                                          \
        } else {                                                       \
            gen_##name1##_(ctx);                                       \
        }                                                              \
    } else {                                                           \
        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);            \
    }                                                                  \
}

static void do_vcmp_rc(int vrt)
{
    TCGv_i64 tmp, set, clr;

    tmp = tcg_temp_new_i64();
    set = tcg_temp_new_i64();
    clr = tcg_temp_new_i64();

    get_avr64(tmp, vrt, true);
    tcg_gen_mov_i64(set, tmp);
    get_avr64(tmp, vrt, false);
    tcg_gen_or_i64(clr, set, tmp);
    tcg_gen_and_i64(set, set, tmp);

    tcg_gen_setcondi_i64(TCG_COND_EQ, clr, clr, 0);
    tcg_gen_shli_i64(clr, clr, 1);

    tcg_gen_setcondi_i64(TCG_COND_EQ, set, set, -1);
    tcg_gen_shli_i64(set, set, 3);

    tcg_gen_or_i64(tmp, set, clr);
    tcg_gen_extrl_i64_i32(cpu_crf[6], tmp);

    tcg_temp_free_i64(tmp);
    tcg_temp_free_i64(set);
    tcg_temp_free_i64(clr);
}

static bool do_vcmp(DisasContext *ctx, arg_VC *a, TCGCond cond, int vece)
{
    REQUIRE_VECTOR(ctx);

    tcg_gen_gvec_cmp(cond, vece, avr_full_offset(a->vrt),
                     avr_full_offset(a->vra), avr_full_offset(a->vrb), 16, 16);

    if (a->rc) {
        do_vcmp_rc(a->vrt);
    }

    return true;
}

TRANS_FLAGS(ALTIVEC, VCMPEQUB, do_vcmp, TCG_COND_EQ, MO_8)
TRANS_FLAGS(ALTIVEC, VCMPEQUH, do_vcmp, TCG_COND_EQ, MO_16)
TRANS_FLAGS(ALTIVEC, VCMPEQUW, do_vcmp, TCG_COND_EQ, MO_32)
TRANS_FLAGS2(ALTIVEC_207, VCMPEQUD, do_vcmp, TCG_COND_EQ, MO_64)

TRANS_FLAGS(ALTIVEC, VCMPGTSB, do_vcmp, TCG_COND_GT, MO_8)
TRANS_FLAGS(ALTIVEC, VCMPGTSH, do_vcmp, TCG_COND_GT, MO_16)
TRANS_FLAGS(ALTIVEC, VCMPGTSW, do_vcmp, TCG_COND_GT, MO_32)
TRANS_FLAGS2(ALTIVEC_207, VCMPGTSD, do_vcmp, TCG_COND_GT, MO_64)
TRANS_FLAGS(ALTIVEC, VCMPGTUB, do_vcmp, TCG_COND_GTU, MO_8)
TRANS_FLAGS(ALTIVEC, VCMPGTUH, do_vcmp, TCG_COND_GTU, MO_16)
TRANS_FLAGS(ALTIVEC, VCMPGTUW, do_vcmp, TCG_COND_GTU, MO_32)
TRANS_FLAGS2(ALTIVEC_207, VCMPGTUD, do_vcmp, TCG_COND_GTU, MO_64)

TRANS_FLAGS2(ISA300, VCMPNEB, do_vcmp, TCG_COND_NE, MO_8)
TRANS_FLAGS2(ISA300, VCMPNEH, do_vcmp, TCG_COND_NE, MO_16)
TRANS_FLAGS2(ISA300, VCMPNEW, do_vcmp, TCG_COND_NE, MO_32)

static void gen_vcmpnez_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
{
    TCGv_vec t0, t1, zero;

    t0 = tcg_temp_new_vec_matching(t);
    t1 = tcg_temp_new_vec_matching(t);
    zero = tcg_constant_vec_matching(t, vece, 0);

    tcg_gen_cmp_vec(TCG_COND_EQ, vece, t0, a, zero);
    tcg_gen_cmp_vec(TCG_COND_EQ, vece, t1, b, zero);
    tcg_gen_cmp_vec(TCG_COND_NE, vece, t, a, b);

    tcg_gen_or_vec(vece, t, t, t0);
    tcg_gen_or_vec(vece, t, t, t1);

    tcg_temp_free_vec(t0);
    tcg_temp_free_vec(t1);
}

static bool do_vcmpnez(DisasContext *ctx, arg_VC *a, int vece)
{
    static const TCGOpcode vecop_list[] = {
        INDEX_op_cmp_vec, 0
    };
    static const GVecGen3 ops[3] = {
        {
            .fniv = gen_vcmpnez_vec,
            .fno = gen_helper_VCMPNEZB,
            .opt_opc = vecop_list,
            .vece = MO_8
        },
        {
            .fniv = gen_vcmpnez_vec,
            .fno = gen_helper_VCMPNEZH,
            .opt_opc = vecop_list,
            .vece = MO_16
        },
        {
            .fniv = gen_vcmpnez_vec,
            .fno = gen_helper_VCMPNEZW,
            .opt_opc = vecop_list,
            .vece = MO_32
        }
    };

    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
    REQUIRE_VECTOR(ctx);

    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
                   avr_full_offset(a->vrb), 16, 16, &ops[vece]);

    if (a->rc) {
        do_vcmp_rc(a->vrt);
    }

    return true;
}

TRANS(VCMPNEZB, do_vcmpnez, MO_8)
TRANS(VCMPNEZH, do_vcmpnez, MO_16)
TRANS(VCMPNEZW, do_vcmpnez, MO_32)

static bool trans_VCMPEQUQ(DisasContext *ctx, arg_VC *a)
{
    TCGv_i64 t0, t1, t2;

    t0 = tcg_temp_new_i64();
    t1 = tcg_temp_new_i64();
    t2 = tcg_temp_new_i64();

    get_avr64(t0, a->vra, true);
    get_avr64(t1, a->vrb, true);
    tcg_gen_xor_i64(t2, t0, t1);

    get_avr64(t0, a->vra, false);
    get_avr64(t1, a->vrb, false);
    tcg_gen_xor_i64(t1, t0, t1);

    tcg_gen_or_i64(t1, t1, t2);
    tcg_gen_setcondi_i64(TCG_COND_EQ, t1, t1, 0);
    tcg_gen_neg_i64(t1, t1);

    set_avr64(a->vrt, t1, true);
    set_avr64(a->vrt, t1, false);

    if (a->rc) {
        tcg_gen_extrl_i64_i32(cpu_crf[6], t1);
        tcg_gen_andi_i32(cpu_crf[6], cpu_crf[6], 0xa);
        tcg_gen_xori_i32(cpu_crf[6], cpu_crf[6], 0x2);
    }

    tcg_temp_free_i64(t0);
    tcg_temp_free_i64(t1);
    tcg_temp_free_i64(t2);

    return true;
}

static bool do_vcmpgtq(DisasContext *ctx, arg_VC *a, bool sign)
{
    TCGv_i64 t0, t1, t2;

    t0 = tcg_temp_new_i64();
    t1 = tcg_temp_new_i64();
    t2 = tcg_temp_new_i64();

    get_avr64(t0, a->vra, false);
    get_avr64(t1, a->vrb, false);
    tcg_gen_setcond_i64(TCG_COND_GTU, t2, t0, t1);

    get_avr64(t0, a->vra, true);
    get_avr64(t1, a->vrb, true);
    tcg_gen_movcond_i64(TCG_COND_EQ, t2, t0, t1, t2, tcg_constant_i64(0));
    tcg_gen_setcond_i64(sign ? TCG_COND_GT : TCG_COND_GTU, t1, t0, t1);

    tcg_gen_or_i64(t1, t1, t2);
    tcg_gen_neg_i64(t1, t1);

    set_avr64(a->vrt, t1, true);
    set_avr64(a->vrt, t1, false);

    if (a->rc) {
        tcg_gen_extrl_i64_i32(cpu_crf[6], t1);
        tcg_gen_andi_i32(cpu_crf[6], cpu_crf[6], 0xa);
        tcg_gen_xori_i32(cpu_crf[6], cpu_crf[6], 0x2);
    }

    tcg_temp_free_i64(t0);
    tcg_temp_free_i64(t1);
    tcg_temp_free_i64(t2);

    return true;
}

TRANS(VCMPGTSQ, do_vcmpgtq, true)
TRANS(VCMPGTUQ, do_vcmpgtq, false)

static bool do_vcmpq(DisasContext *ctx, arg_VX_bf *a, bool sign)
{
    TCGv_i64 vra, vrb;
    TCGLabel *gt, *lt, *done;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    vra = tcg_temp_local_new_i64();
    vrb = tcg_temp_local_new_i64();
    gt = gen_new_label();
    lt = gen_new_label();
    done = gen_new_label();

    get_avr64(vra, a->vra, true);
    get_avr64(vrb, a->vrb, true);
    tcg_gen_brcond_i64((sign ? TCG_COND_GT : TCG_COND_GTU), vra, vrb, gt);
    tcg_gen_brcond_i64((sign ? TCG_COND_LT : TCG_COND_LTU), vra, vrb, lt);

    get_avr64(vra, a->vra, false);
    get_avr64(vrb, a->vrb, false);
    tcg_gen_brcond_i64(TCG_COND_GTU, vra, vrb, gt);
    tcg_gen_brcond_i64(TCG_COND_LTU, vra, vrb, lt);

    tcg_gen_movi_i32(cpu_crf[a->bf], CRF_EQ);
    tcg_gen_br(done);

    gen_set_label(gt);
    tcg_gen_movi_i32(cpu_crf[a->bf], CRF_GT);
    tcg_gen_br(done);

    gen_set_label(lt);
    tcg_gen_movi_i32(cpu_crf[a->bf], CRF_LT);
    tcg_gen_br(done);

    gen_set_label(done);
    tcg_temp_free_i64(vra);
    tcg_temp_free_i64(vrb);

    return true;
}

TRANS(VCMPSQ, do_vcmpq, true)
TRANS(VCMPUQ, do_vcmpq, false)

GEN_VXRFORM(vcmpeqfp, 3, 3)
GEN_VXRFORM(vcmpgefp, 3, 7)
GEN_VXRFORM(vcmpgtfp, 3, 11)
GEN_VXRFORM(vcmpbfp, 3, 15)

static void gen_vsplti(DisasContext *ctx, int vece)
{
    int simm;

    if (unlikely(!ctx->altivec_enabled)) {
        gen_exception(ctx, POWERPC_EXCP_VPU);
        return;
    }

    simm = SIMM5(ctx->opcode);
    tcg_gen_gvec_dup_imm(vece, avr_full_offset(rD(ctx->opcode)), 16, 16, simm);
}

#define GEN_VXFORM_VSPLTI(name, vece, opc2, opc3) \
static void glue(gen_, name)(DisasContext *ctx) { gen_vsplti(ctx, vece); }

GEN_VXFORM_VSPLTI(vspltisb, MO_8, 6, 12);
GEN_VXFORM_VSPLTI(vspltish, MO_16, 6, 13);
GEN_VXFORM_VSPLTI(vspltisw, MO_32, 6, 14);

#define GEN_VXFORM_NOA(name, opc2, opc3)                                \
static void glue(gen_, name)(DisasContext *ctx)                         \
    {                                                                   \
        TCGv_ptr rb, rd;                                                \
        if (unlikely(!ctx->altivec_enabled)) {                          \
            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
            return;                                                     \
        }                                                               \
        rb = gen_avr_ptr(rB(ctx->opcode));                              \
        rd = gen_avr_ptr(rD(ctx->opcode));                              \
        gen_helper_##name(rd, rb);                                      \
        tcg_temp_free_ptr(rb);                                          \
        tcg_temp_free_ptr(rd);                                          \
    }

#define GEN_VXFORM_NOA_ENV(name, opc2, opc3)                            \
static void glue(gen_, name)(DisasContext *ctx)                         \
    {                                                                   \
        TCGv_ptr rb, rd;                                                \
                                                                        \
        if (unlikely(!ctx->altivec_enabled)) {                          \
            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
            return;                                                     \
        }                                                               \
        rb = gen_avr_ptr(rB(ctx->opcode));                              \
        rd = gen_avr_ptr(rD(ctx->opcode));                              \
        gen_helper_##name(cpu_env, rd, rb);                             \
        tcg_temp_free_ptr(rb);                                          \
        tcg_temp_free_ptr(rd);                                          \
    }

#define GEN_VXFORM_NOA_2(name, opc2, opc3, opc4)                        \
static void glue(gen_, name)(DisasContext *ctx)                         \
    {                                                                   \
        TCGv_ptr rb, rd;                                                \
        if (unlikely(!ctx->altivec_enabled)) {                          \
            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
            return;                                                     \
        }                                                               \
        rb = gen_avr_ptr(rB(ctx->opcode));                              \
        rd = gen_avr_ptr(rD(ctx->opcode));                              \
        gen_helper_##name(rd, rb);                                      \
        tcg_temp_free_ptr(rb);                                          \
        tcg_temp_free_ptr(rd);                                          \
    }

#define GEN_VXFORM_NOA_3(name, opc2, opc3, opc4)                        \
static void glue(gen_, name)(DisasContext *ctx)                         \
    {                                                                   \
        TCGv_ptr rb;                                                    \
        if (unlikely(!ctx->altivec_enabled)) {                          \
            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
            return;                                                     \
        }                                                               \
        rb = gen_avr_ptr(rB(ctx->opcode));                              \
        gen_helper_##name(cpu_gpr[rD(ctx->opcode)], rb);                \
        tcg_temp_free_ptr(rb);                                          \
    }
GEN_VXFORM_NOA(vupkhsb, 7, 8);
GEN_VXFORM_NOA(vupkhsh, 7, 9);
GEN_VXFORM_NOA(vupkhsw, 7, 25);
GEN_VXFORM_NOA(vupklsb, 7, 10);
GEN_VXFORM_NOA(vupklsh, 7, 11);
GEN_VXFORM_NOA(vupklsw, 7, 27);
GEN_VXFORM_NOA(vupkhpx, 7, 13);
GEN_VXFORM_NOA(vupklpx, 7, 15);
GEN_VXFORM_NOA_ENV(vrefp, 5, 4);
GEN_VXFORM_NOA_ENV(vrsqrtefp, 5, 5);
GEN_VXFORM_NOA_ENV(vexptefp, 5, 6);
GEN_VXFORM_NOA_ENV(vlogefp, 5, 7);
GEN_VXFORM_NOA_ENV(vrfim, 5, 11);
GEN_VXFORM_NOA_ENV(vrfin, 5, 8);
GEN_VXFORM_NOA_ENV(vrfip, 5, 10);
GEN_VXFORM_NOA_ENV(vrfiz, 5, 9);
GEN_VXFORM_NOA(vprtybw, 1, 24);
GEN_VXFORM_NOA(vprtybd, 1, 24);
GEN_VXFORM_NOA(vprtybq, 1, 24);

static void gen_vsplt(DisasContext *ctx, int vece)
{
    int uimm, dofs, bofs;

    if (unlikely(!ctx->altivec_enabled)) {
        gen_exception(ctx, POWERPC_EXCP_VPU);
        return;
    }

    uimm = UIMM5(ctx->opcode);
    bofs = avr_full_offset(rB(ctx->opcode));
    dofs = avr_full_offset(rD(ctx->opcode));

    /* Experimental testing shows that hardware masks the immediate.  */
    bofs += (uimm << vece) & 15;
#if !HOST_BIG_ENDIAN
    bofs ^= 15;
    bofs &= ~((1 << vece) - 1);
#endif

    tcg_gen_gvec_dup_mem(vece, dofs, bofs, 16, 16);
}

#define GEN_VXFORM_VSPLT(name, vece, opc2, opc3) \
static void glue(gen_, name)(DisasContext *ctx) { gen_vsplt(ctx, vece); }

#define GEN_VXFORM_UIMM_ENV(name, opc2, opc3)                           \
static void glue(gen_, name)(DisasContext *ctx)                         \
    {                                                                   \
        TCGv_ptr rb, rd;                                                \
        TCGv_i32 uimm;                                                  \
                                                                        \
        if (unlikely(!ctx->altivec_enabled)) {                          \
            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
            return;                                                     \
        }                                                               \
        uimm = tcg_const_i32(UIMM5(ctx->opcode));                       \
        rb = gen_avr_ptr(rB(ctx->opcode));                              \
        rd = gen_avr_ptr(rD(ctx->opcode));                              \
        gen_helper_##name(cpu_env, rd, rb, uimm);                       \
        tcg_temp_free_i32(uimm);                                        \
        tcg_temp_free_ptr(rb);                                          \
        tcg_temp_free_ptr(rd);                                          \
    }

#define GEN_VXFORM_UIMM_SPLAT(name, opc2, opc3, splat_max)              \
static void glue(gen_, name)(DisasContext *ctx)                         \
    {                                                                   \
        TCGv_ptr rb, rd;                                                \
        uint8_t uimm = UIMM4(ctx->opcode);                              \
        TCGv_i32 t0;                                                    \
        if (unlikely(!ctx->altivec_enabled)) {                          \
            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
            return;                                                     \
        }                                                               \
        if (uimm > splat_max) {                                         \
            uimm = 0;                                                   \
        }                                                               \
        t0 = tcg_temp_new_i32();                                        \
        tcg_gen_movi_i32(t0, uimm);                                     \
        rb = gen_avr_ptr(rB(ctx->opcode));                              \
        rd = gen_avr_ptr(rD(ctx->opcode));                              \
        gen_helper_##name(rd, rb, t0);                                  \
        tcg_temp_free_i32(t0);                                          \
        tcg_temp_free_ptr(rb);                                          \
        tcg_temp_free_ptr(rd);                                          \
    }

GEN_VXFORM_VSPLT(vspltb, MO_8, 6, 8);
GEN_VXFORM_VSPLT(vsplth, MO_16, 6, 9);
GEN_VXFORM_VSPLT(vspltw, MO_32, 6, 10);
GEN_VXFORM_UIMM_SPLAT(vextractub, 6, 8, 15);
GEN_VXFORM_UIMM_SPLAT(vextractuh, 6, 9, 14);
GEN_VXFORM_UIMM_SPLAT(vextractuw, 6, 10, 12);
GEN_VXFORM_UIMM_SPLAT(vextractd, 6, 11, 8);
GEN_VXFORM_UIMM_ENV(vcfux, 5, 12);
GEN_VXFORM_UIMM_ENV(vcfsx, 5, 13);
GEN_VXFORM_UIMM_ENV(vctuxs, 5, 14);
GEN_VXFORM_UIMM_ENV(vctsxs, 5, 15);
GEN_VXFORM_DUAL(vspltb, PPC_ALTIVEC, PPC_NONE,
                vextractub, PPC_NONE, PPC2_ISA300);
GEN_VXFORM_DUAL(vsplth, PPC_ALTIVEC, PPC_NONE,
                vextractuh, PPC_NONE, PPC2_ISA300);
GEN_VXFORM_DUAL(vspltw, PPC_ALTIVEC, PPC_NONE,
                vextractuw, PPC_NONE, PPC2_ISA300);

static bool trans_VGNB(DisasContext *ctx, arg_VX_n *a)
{
    /*
     * Similar to do_vextractm, we'll use a sequence of mask-shift-or operations
     * to gather the bits. The masks can be created with
     *
     * uint64_t mask(uint64_t n, uint64_t step)
     * {
     *     uint64_t p = ((1UL << (1UL << step)) - 1UL) << ((n - 1UL) << step),
     *                  plen = n << step, m = 0;
     *     for(int i = 0; i < 64/plen; i++) {
     *         m |= p;
     *         m = ror64(m, plen);
     *     }
     *     p >>= plen * DIV_ROUND_UP(64, plen) - 64;
     *     return m | p;
     * }
     *
     * But since there are few values of N, we'll use a lookup table to avoid
     * these calculations at runtime.
     */
    static const uint64_t mask[6][5] = {
        {
            0xAAAAAAAAAAAAAAAAULL, 0xccccccccccccccccULL, 0xf0f0f0f0f0f0f0f0ULL,
            0xff00ff00ff00ff00ULL, 0xffff0000ffff0000ULL
        },
        {
            0x9249249249249249ULL, 0xC30C30C30C30C30CULL, 0xF00F00F00F00F00FULL,
            0xFF0000FF0000FF00ULL, 0xFFFF00000000FFFFULL
        },
        {
            /* For N >= 4, some mask operations can be elided */
            0x8888888888888888ULL, 0, 0xf000f000f000f000ULL, 0,
            0xFFFF000000000000ULL
        },
        {
            0x8421084210842108ULL, 0, 0xF0000F0000F0000FULL, 0, 0
        },
        {
            0x8208208208208208ULL, 0, 0xF00000F00000F000ULL, 0, 0
        },
        {
            0x8102040810204081ULL, 0, 0xF000000F000000F0ULL, 0, 0
        }
    };
    uint64_t m;
    int i, sh, nbits = DIV_ROUND_UP(64, a->n);
    TCGv_i64 hi, lo, t0, t1;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    if (a->n < 2) {
        /*
         * "N can be any value between 2 and 7, inclusive." Otherwise, the
         * result is undefined, so we don't need to change RT. Also, N > 7 is
         * impossible since the immediate field is 3 bits only.
         */
        return true;
    }

    hi = tcg_temp_new_i64();
    lo = tcg_temp_new_i64();
    t0 = tcg_temp_new_i64();
    t1 = tcg_temp_new_i64();

    get_avr64(hi, a->vrb, true);
    get_avr64(lo, a->vrb, false);

    /* Align the lower doubleword so we can use the same mask */
    tcg_gen_shli_i64(lo, lo, a->n * nbits - 64);

    /*
     * Starting from the most significant bit, gather every Nth bit with a
     * sequence of mask-shift-or operation. E.g.: for N=3
     * AxxBxxCxxDxxExxFxxGxxHxxIxxJxxKxxLxxMxxNxxOxxPxxQxxRxxSxxTxxUxxV
     *     & rep(0b100)
     * A..B..C..D..E..F..G..H..I..J..K..L..M..N..O..P..Q..R..S..T..U..V
     *     << 2
     * .B..C..D..E..F..G..H..I..J..K..L..M..N..O..P..Q..R..S..T..U..V..
     *     |
     * AB.BC.CD.DE.EF.FG.GH.HI.IJ.JK.KL.LM.MN.NO.OP.PQ.QR.RS.ST.TU.UV.V
     *  & rep(0b110000)
     * AB....CD....EF....GH....IJ....KL....MN....OP....QR....ST....UV..
     *     << 4
     * ..CD....EF....GH....IJ....KL....MN....OP....QR....ST....UV......
     *     |
     * ABCD..CDEF..EFGH..GHIJ..IJKL..KLMN..MNOP..OPQR..QRST..STUV..UV..
     *     & rep(0b111100000000)
     * ABCD........EFGH........IJKL........MNOP........QRST........UV..
     *     << 8
     * ....EFGH........IJKL........MNOP........QRST........UV..........
     *     |
     * ABCDEFGH....EFGHIJKL....IJKLMNOP....MNOPQRST....QRSTUV......UV..
     *  & rep(0b111111110000000000000000)
     * ABCDEFGH................IJKLMNOP................QRSTUV..........
     *     << 16
     * ........IJKLMNOP................QRSTUV..........................
     *     |
     * ABCDEFGHIJKLMNOP........IJKLMNOPQRSTUV..........QRSTUV..........
     *     & rep(0b111111111111111100000000000000000000000000000000)
     * ABCDEFGHIJKLMNOP................................QRSTUV..........
     *     << 32
     * ................QRSTUV..........................................
     *     |
     * ABCDEFGHIJKLMNOPQRSTUV..........................QRSTUV..........
     */
    for (i = 0, sh = a->n - 1; i < 5; i++, sh <<= 1) {
        m = mask[a->n - 2][i];
        if (m) {
            tcg_gen_andi_i64(hi, hi, m);
            tcg_gen_andi_i64(lo, lo, m);
        }
        if (sh < 64) {
            tcg_gen_shli_i64(t0, hi, sh);
            tcg_gen_shli_i64(t1, lo, sh);
            tcg_gen_or_i64(hi, t0, hi);
            tcg_gen_or_i64(lo, t1, lo);
        }
    }

    tcg_gen_andi_i64(hi, hi, ~(~0ULL >> nbits));
    tcg_gen_andi_i64(lo, lo, ~(~0ULL >> nbits));
    tcg_gen_shri_i64(lo, lo, nbits);
    tcg_gen_or_i64(hi, hi, lo);
    tcg_gen_trunc_i64_tl(cpu_gpr[a->rt], hi);

    tcg_temp_free_i64(hi);
    tcg_temp_free_i64(lo);
    tcg_temp_free_i64(t0);
    tcg_temp_free_i64(t1);

    return true;
}

static bool do_vextdx(DisasContext *ctx, arg_VA *a, int size, bool right,
               void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv))
{
    TCGv_ptr vrt, vra, vrb;
    TCGv rc;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    vrt = gen_avr_ptr(a->vrt);
    vra = gen_avr_ptr(a->vra);
    vrb = gen_avr_ptr(a->vrb);
    rc = tcg_temp_new();

    tcg_gen_andi_tl(rc, cpu_gpr[a->rc], 0x1F);
    if (right) {
        tcg_gen_subfi_tl(rc, 32 - size, rc);
    }
    gen_helper(cpu_env, vrt, vra, vrb, rc);

    tcg_temp_free_ptr(vrt);
    tcg_temp_free_ptr(vra);
    tcg_temp_free_ptr(vrb);
    tcg_temp_free(rc);
    return true;
}

TRANS(VEXTDUBVLX, do_vextdx, 1, false, gen_helper_VEXTDUBVLX)
TRANS(VEXTDUHVLX, do_vextdx, 2, false, gen_helper_VEXTDUHVLX)
TRANS(VEXTDUWVLX, do_vextdx, 4, false, gen_helper_VEXTDUWVLX)
TRANS(VEXTDDVLX, do_vextdx, 8, false, gen_helper_VEXTDDVLX)

TRANS(VEXTDUBVRX, do_vextdx, 1, true, gen_helper_VEXTDUBVLX)
TRANS(VEXTDUHVRX, do_vextdx, 2, true, gen_helper_VEXTDUHVLX)
TRANS(VEXTDUWVRX, do_vextdx, 4, true, gen_helper_VEXTDUWVLX)
TRANS(VEXTDDVRX, do_vextdx, 8, true, gen_helper_VEXTDDVLX)

static bool do_vinsx(DisasContext *ctx, int vrt, int size, bool right, TCGv ra,
            TCGv_i64 rb, void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
{
    TCGv_ptr t;
    TCGv idx;

    t = gen_avr_ptr(vrt);
    idx = tcg_temp_new();

    tcg_gen_andi_tl(idx, ra, 0xF);
    if (right) {
        tcg_gen_subfi_tl(idx, 16 - size, idx);
    }

    gen_helper(cpu_env, t, rb, idx);

    tcg_temp_free_ptr(t);
    tcg_temp_free(idx);

    return true;
}

static bool do_vinsvx(DisasContext *ctx, int vrt, int size, bool right, TCGv ra,
                int vrb, void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
{
    bool ok;
    TCGv_i64 val;

    val = tcg_temp_new_i64();
    get_avr64(val, vrb, true);
    ok = do_vinsx(ctx, vrt, size, right, ra, val, gen_helper);

    tcg_temp_free_i64(val);
    return ok;
}

static bool do_vinsx_VX(DisasContext *ctx, arg_VX *a, int size, bool right,
                        void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
{
    bool ok;
    TCGv_i64 val;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    val = tcg_temp_new_i64();
    tcg_gen_extu_tl_i64(val, cpu_gpr[a->vrb]);

    ok = do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], val, gen_helper);

    tcg_temp_free_i64(val);
    return ok;
}

static bool do_vinsvx_VX(DisasContext *ctx, arg_VX *a, int size, bool right,
                        void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
{
    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    return do_vinsvx(ctx, a->vrt, size, right, cpu_gpr[a->vra], a->vrb,
                     gen_helper);
}

static bool do_vins_VX_uim4(DisasContext *ctx, arg_VX_uim4 *a, int size,
                        void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
{
    bool ok;
    TCGv_i64 val;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    if (a->uim > (16 - size)) {
        /*
         * PowerISA v3.1 says that the resulting value is undefined in this
         * case, so just log a guest error and leave VRT unchanged. The
         * real hardware would do a partial insert, e.g. if VRT is zeroed and
         * RB is 0x12345678, executing "vinsw VRT,RB,14" results in
         * VRT = 0x0000...00001234, but we don't bother to reproduce this
         * behavior as software shouldn't rely on it.
         */
        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for VINS* at"
            " 0x" TARGET_FMT_lx ", UIM = %d > %d\n", ctx->cia, a->uim,
            16 - size);
        return true;
    }

    val = tcg_temp_new_i64();
    tcg_gen_extu_tl_i64(val, cpu_gpr[a->vrb]);

    ok = do_vinsx(ctx, a->vrt, size, false, tcg_constant_tl(a->uim), val,
                  gen_helper);

    tcg_temp_free_i64(val);
    return ok;
}

static bool do_vinsert_VX_uim4(DisasContext *ctx, arg_VX_uim4 *a, int size,
                        void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
{
    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
    REQUIRE_VECTOR(ctx);

    if (a->uim > (16 - size)) {
        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for VINSERT* at"
            " 0x" TARGET_FMT_lx ", UIM = %d > %d\n", ctx->cia, a->uim,
            16 - size);
        return true;
    }

    return do_vinsvx(ctx, a->vrt, size, false, tcg_constant_tl(a->uim), a->vrb,
                     gen_helper);
}

TRANS(VINSBLX, do_vinsx_VX, 1, false, gen_helper_VINSBLX)
TRANS(VINSHLX, do_vinsx_VX, 2, false, gen_helper_VINSHLX)
TRANS(VINSWLX, do_vinsx_VX, 4, false, gen_helper_VINSWLX)
TRANS(VINSDLX, do_vinsx_VX, 8, false, gen_helper_VINSDLX)

TRANS(VINSBRX, do_vinsx_VX, 1, true, gen_helper_VINSBLX)
TRANS(VINSHRX, do_vinsx_VX, 2, true, gen_helper_VINSHLX)
TRANS(VINSWRX, do_vinsx_VX, 4, true, gen_helper_VINSWLX)
TRANS(VINSDRX, do_vinsx_VX, 8, true, gen_helper_VINSDLX)

TRANS(VINSW, do_vins_VX_uim4, 4, gen_helper_VINSWLX)
TRANS(VINSD, do_vins_VX_uim4, 8, gen_helper_VINSDLX)

TRANS(VINSBVLX, do_vinsvx_VX, 1, false, gen_helper_VINSBLX)
TRANS(VINSHVLX, do_vinsvx_VX, 2, false, gen_helper_VINSHLX)
TRANS(VINSWVLX, do_vinsvx_VX, 4, false, gen_helper_VINSWLX)

TRANS(VINSBVRX, do_vinsvx_VX, 1, true, gen_helper_VINSBLX)
TRANS(VINSHVRX, do_vinsvx_VX, 2, true, gen_helper_VINSHLX)
TRANS(VINSWVRX, do_vinsvx_VX, 4, true, gen_helper_VINSWLX)

TRANS(VINSERTB, do_vinsert_VX_uim4, 1, gen_helper_VINSBLX)
TRANS(VINSERTH, do_vinsert_VX_uim4, 2, gen_helper_VINSHLX)
TRANS(VINSERTW, do_vinsert_VX_uim4, 4, gen_helper_VINSWLX)
TRANS(VINSERTD, do_vinsert_VX_uim4, 8, gen_helper_VINSDLX)

static void gen_vsldoi(DisasContext *ctx)
{
    TCGv_ptr ra, rb, rd;
    TCGv_i32 sh;
    if (unlikely(!ctx->altivec_enabled)) {
        gen_exception(ctx, POWERPC_EXCP_VPU);
        return;
    }
    ra = gen_avr_ptr(rA(ctx->opcode));
    rb = gen_avr_ptr(rB(ctx->opcode));
    rd = gen_avr_ptr(rD(ctx->opcode));
    sh = tcg_const_i32(VSH(ctx->opcode));
    gen_helper_vsldoi(rd, ra, rb, sh);
    tcg_temp_free_ptr(ra);
    tcg_temp_free_ptr(rb);
    tcg_temp_free_ptr(rd);
    tcg_temp_free_i32(sh);
}

static bool trans_VSLDBI(DisasContext *ctx, arg_VN *a)
{
    TCGv_i64 t0, t1, t2;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    t0 = tcg_temp_new_i64();
    t1 = tcg_temp_new_i64();

    get_avr64(t0, a->vra, true);
    get_avr64(t1, a->vra, false);

    if (a->sh != 0) {
        t2 = tcg_temp_new_i64();

        get_avr64(t2, a->vrb, true);

        tcg_gen_extract2_i64(t0, t1, t0, 64 - a->sh);
        tcg_gen_extract2_i64(t1, t2, t1, 64 - a->sh);

        tcg_temp_free_i64(t2);
    }

    set_avr64(a->vrt, t0, true);
    set_avr64(a->vrt, t1, false);

    tcg_temp_free_i64(t0);
    tcg_temp_free_i64(t1);

    return true;
}

static bool trans_VSRDBI(DisasContext *ctx, arg_VN *a)
{
    TCGv_i64 t2, t1, t0;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    t0 = tcg_temp_new_i64();
    t1 = tcg_temp_new_i64();

    get_avr64(t0, a->vrb, false);
    get_avr64(t1, a->vrb, true);

    if (a->sh != 0) {
        t2 = tcg_temp_new_i64();

        get_avr64(t2, a->vra, false);

        tcg_gen_extract2_i64(t0, t0, t1, a->sh);
        tcg_gen_extract2_i64(t1, t1, t2, a->sh);

        tcg_temp_free_i64(t2);
    }

    set_avr64(a->vrt, t0, false);
    set_avr64(a->vrt, t1, true);

    tcg_temp_free_i64(t0);
    tcg_temp_free_i64(t1);

    return true;
}

static bool do_vexpand(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
{
    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    tcg_gen_gvec_sari(vece, avr_full_offset(a->vrt), avr_full_offset(a->vrb),
                      (8 << vece) - 1, 16, 16);

    return true;
}

TRANS(VEXPANDBM, do_vexpand, MO_8)
TRANS(VEXPANDHM, do_vexpand, MO_16)
TRANS(VEXPANDWM, do_vexpand, MO_32)
TRANS(VEXPANDDM, do_vexpand, MO_64)

static bool trans_VEXPANDQM(DisasContext *ctx, arg_VX_tb *a)
{
    TCGv_i64 tmp;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    tmp = tcg_temp_new_i64();

    get_avr64(tmp, a->vrb, true);
    tcg_gen_sari_i64(tmp, tmp, 63);
    set_avr64(a->vrt, tmp, false);
    set_avr64(a->vrt, tmp, true);

    tcg_temp_free_i64(tmp);
    return true;
}

static bool do_vextractm(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
{
    const uint64_t elem_width = 8 << vece, elem_count_half = 8 >> vece,
                   mask = dup_const(vece, 1 << (elem_width - 1));
    uint64_t i, j;
    TCGv_i64 lo, hi, t0, t1;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    hi = tcg_temp_new_i64();
    lo = tcg_temp_new_i64();
    t0 = tcg_temp_new_i64();
    t1 = tcg_temp_new_i64();

    get_avr64(lo, a->vrb, false);
    get_avr64(hi, a->vrb, true);

    tcg_gen_andi_i64(lo, lo, mask);
    tcg_gen_andi_i64(hi, hi, mask);

    /*
     * Gather the most significant bit of each element in the highest element
     * element. E.g. for bytes:
     * aXXXXXXXbXXXXXXXcXXXXXXXdXXXXXXXeXXXXXXXfXXXXXXXgXXXXXXXhXXXXXXX
     *     & dup(1 << (elem_width - 1))
     * a0000000b0000000c0000000d0000000e0000000f0000000g0000000h0000000
     *     << 32 - 4
     * 0000e0000000f0000000g0000000h00000000000000000000000000000000000
     *     |
     * a000e000b000f000c000g000d000h000e0000000f0000000g0000000h0000000
     *     << 16 - 2
     * 00c000g000d000h000e0000000f0000000g0000000h000000000000000000000
     *     |
     * a0c0e0g0b0d0f0h0c0e0g000d0f0h000e0g00000f0h00000g0000000h0000000
     *     << 8 - 1
     * 0b0d0f0h0c0e0g000d0f0h000e0g00000f0h00000g0000000h00000000000000
     *     |
     * abcdefghbcdefgh0cdefgh00defgh000efgh0000fgh00000gh000000h0000000
     */
    for (i = elem_count_half / 2, j = 32; i > 0; i >>= 1, j >>= 1) {
        tcg_gen_shli_i64(t0, hi, j - i);
        tcg_gen_shli_i64(t1, lo, j - i);
        tcg_gen_or_i64(hi, hi, t0);
        tcg_gen_or_i64(lo, lo, t1);
    }

    tcg_gen_shri_i64(hi, hi, 64 - elem_count_half);
    tcg_gen_extract2_i64(lo, lo, hi, 64 - elem_count_half);
    tcg_gen_trunc_i64_tl(cpu_gpr[a->vrt], lo);

    tcg_temp_free_i64(hi);
    tcg_temp_free_i64(lo);
    tcg_temp_free_i64(t0);
    tcg_temp_free_i64(t1);

    return true;
}

TRANS(VEXTRACTBM, do_vextractm, MO_8)
TRANS(VEXTRACTHM, do_vextractm, MO_16)
TRANS(VEXTRACTWM, do_vextractm, MO_32)
TRANS(VEXTRACTDM, do_vextractm, MO_64)

static bool trans_VEXTRACTQM(DisasContext *ctx, arg_VX_tb *a)
{
    TCGv_i64 tmp;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    tmp = tcg_temp_new_i64();

    get_avr64(tmp, a->vrb, true);
    tcg_gen_shri_i64(tmp, tmp, 63);
    tcg_gen_trunc_i64_tl(cpu_gpr[a->vrt], tmp);

    tcg_temp_free_i64(tmp);

    return true;
}

static bool do_mtvsrm(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
{
    const uint64_t elem_width = 8 << vece, elem_count_half = 8 >> vece;
    uint64_t c;
    int i, j;
    TCGv_i64 hi, lo, t0, t1;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    hi = tcg_temp_new_i64();
    lo = tcg_temp_new_i64();
    t0 = tcg_temp_new_i64();
    t1 = tcg_temp_new_i64();

    tcg_gen_extu_tl_i64(t0, cpu_gpr[a->vrb]);
    tcg_gen_extract_i64(hi, t0, elem_count_half, elem_count_half);
    tcg_gen_extract_i64(lo, t0, 0, elem_count_half);

    /*
     * Spread the bits into their respective elements.
     * E.g. for bytes:
     * 00000000000000000000000000000000000000000000000000000000abcdefgh
     *   << 32 - 4
     * 0000000000000000000000000000abcdefgh0000000000000000000000000000
     *   |
     * 0000000000000000000000000000abcdefgh00000000000000000000abcdefgh
     *   << 16 - 2
     * 00000000000000abcdefgh00000000000000000000abcdefgh00000000000000
     *   |
     * 00000000000000abcdefgh000000abcdefgh000000abcdefgh000000abcdefgh
     *   << 8 - 1
     * 0000000abcdefgh000000abcdefgh000000abcdefgh000000abcdefgh0000000
     *   |
     * 0000000abcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgh
     *   & dup(1)
     * 0000000a0000000b0000000c0000000d0000000e0000000f0000000g0000000h
     *   * 0xff
     * aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
     */
    for (i = elem_count_half / 2, j = 32; i > 0; i >>= 1, j >>= 1) {
        tcg_gen_shli_i64(t0, hi, j - i);
        tcg_gen_shli_i64(t1, lo, j - i);
        tcg_gen_or_i64(hi, hi, t0);
        tcg_gen_or_i64(lo, lo, t1);
    }

    c = dup_const(vece, 1);
    tcg_gen_andi_i64(hi, hi, c);
    tcg_gen_andi_i64(lo, lo, c);

    c = MAKE_64BIT_MASK(0, elem_width);
    tcg_gen_muli_i64(hi, hi, c);
    tcg_gen_muli_i64(lo, lo, c);

    set_avr64(a->vrt, lo, false);
    set_avr64(a->vrt, hi, true);

    tcg_temp_free_i64(hi);
    tcg_temp_free_i64(lo);
    tcg_temp_free_i64(t0);
    tcg_temp_free_i64(t1);

    return true;
}

TRANS(MTVSRBM, do_mtvsrm, MO_8)
TRANS(MTVSRHM, do_mtvsrm, MO_16)
TRANS(MTVSRWM, do_mtvsrm, MO_32)
TRANS(MTVSRDM, do_mtvsrm, MO_64)

static bool trans_MTVSRQM(DisasContext *ctx, arg_VX_tb *a)
{
    TCGv_i64 tmp;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    tmp = tcg_temp_new_i64();

    tcg_gen_ext_tl_i64(tmp, cpu_gpr[a->vrb]);
    tcg_gen_sextract_i64(tmp, tmp, 0, 1);
    set_avr64(a->vrt, tmp, false);
    set_avr64(a->vrt, tmp, true);

    tcg_temp_free_i64(tmp);

    return true;
}

static bool trans_MTVSRBMI(DisasContext *ctx, arg_DX_b *a)
{
    const uint64_t mask = dup_const(MO_8, 1);
    uint64_t hi, lo;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    hi = extract16(a->b, 8, 8);
    lo = extract16(a->b, 0, 8);

    for (int i = 4, j = 32; i > 0; i >>= 1, j >>= 1) {
        hi |= hi << (j - i);
        lo |= lo << (j - i);
    }

    hi = (hi & mask) * 0xFF;
    lo = (lo & mask) * 0xFF;

    set_avr64(a->vrt, tcg_constant_i64(hi), true);
    set_avr64(a->vrt, tcg_constant_i64(lo), false);

    return true;
}

static bool do_vcntmb(DisasContext *ctx, arg_VX_mp *a, int vece)
{
    TCGv_i64 rt, vrb, mask;
    rt = tcg_const_i64(0);
    vrb = tcg_temp_new_i64();
    mask = tcg_constant_i64(dup_const(vece, 1ULL << ((8 << vece) - 1)));

    for (int i = 0; i < 2; i++) {
        get_avr64(vrb, a->vrb, i);
        if (a->mp) {
            tcg_gen_and_i64(vrb, mask, vrb);
        } else {
            tcg_gen_andc_i64(vrb, mask, vrb);
        }
        tcg_gen_ctpop_i64(vrb, vrb);
        tcg_gen_add_i64(rt, rt, vrb);
    }

    tcg_gen_shli_i64(rt, rt, TARGET_LONG_BITS - 8 + vece);
    tcg_gen_trunc_i64_tl(cpu_gpr[a->rt], rt);

    tcg_temp_free_i64(vrb);
    tcg_temp_free_i64(rt);

    return true;
}

TRANS(VCNTMBB, do_vcntmb, MO_8)
TRANS(VCNTMBH, do_vcntmb, MO_16)
TRANS(VCNTMBW, do_vcntmb, MO_32)
TRANS(VCNTMBD, do_vcntmb, MO_64)

static bool do_vstri(DisasContext *ctx, arg_VX_tb_rc *a,
                     void (*gen_helper)(TCGv_i32, TCGv_ptr, TCGv_ptr))
{
    TCGv_ptr vrt, vrb;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    vrt = gen_avr_ptr(a->vrt);
    vrb = gen_avr_ptr(a->vrb);

    if (a->rc) {
        gen_helper(cpu_crf[6], vrt, vrb);
    } else {
        TCGv_i32 discard = tcg_temp_new_i32();
        gen_helper(discard, vrt, vrb);
        tcg_temp_free_i32(discard);
    }

    tcg_temp_free_ptr(vrt);
    tcg_temp_free_ptr(vrb);

    return true;
}

TRANS(VSTRIBL, do_vstri, gen_helper_VSTRIBL)
TRANS(VSTRIBR, do_vstri, gen_helper_VSTRIBR)
TRANS(VSTRIHL, do_vstri, gen_helper_VSTRIHL)
TRANS(VSTRIHR, do_vstri, gen_helper_VSTRIHR)

static bool do_vclrb(DisasContext *ctx, arg_VX *a, bool right)
{
    TCGv_i64 rb, mh, ml, tmp,
             ones = tcg_constant_i64(-1),
             zero = tcg_constant_i64(0);

    rb = tcg_temp_new_i64();
    mh = tcg_temp_new_i64();
    ml = tcg_temp_new_i64();
    tmp = tcg_temp_new_i64();

    tcg_gen_extu_tl_i64(rb, cpu_gpr[a->vrb]);
    tcg_gen_andi_i64(tmp, rb, 7);
    tcg_gen_shli_i64(tmp, tmp, 3);
    if (right) {
        tcg_gen_shr_i64(tmp, ones, tmp);
    } else {
        tcg_gen_shl_i64(tmp, ones, tmp);
    }
    tcg_gen_not_i64(tmp, tmp);

    if (right) {
        tcg_gen_movcond_i64(TCG_COND_LTU, mh, rb, tcg_constant_i64(8),
                            tmp, ones);
        tcg_gen_movcond_i64(TCG_COND_LTU, ml, rb, tcg_constant_i64(8),
                            zero, tmp);
        tcg_gen_movcond_i64(TCG_COND_LTU, ml, rb, tcg_constant_i64(16),
                            ml, ones);
    } else {
        tcg_gen_movcond_i64(TCG_COND_LTU, ml, rb, tcg_constant_i64(8),
                            tmp, ones);
        tcg_gen_movcond_i64(TCG_COND_LTU, mh, rb, tcg_constant_i64(8),
                            zero, tmp);
        tcg_gen_movcond_i64(TCG_COND_LTU, mh, rb, tcg_constant_i64(16),
                            mh, ones);
    }

    get_avr64(tmp, a->vra, true);
    tcg_gen_and_i64(tmp, tmp, mh);
    set_avr64(a->vrt, tmp, true);

    get_avr64(tmp, a->vra, false);
    tcg_gen_and_i64(tmp, tmp, ml);
    set_avr64(a->vrt, tmp, false);

    tcg_temp_free_i64(rb);
    tcg_temp_free_i64(mh);
    tcg_temp_free_i64(ml);
    tcg_temp_free_i64(tmp);

    return true;
}

TRANS(VCLRLB, do_vclrb, false)
TRANS(VCLRRB, do_vclrb, true)

#define GEN_VAFORM_PAIRED(name0, name1, opc2)                           \
static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
    {                                                                   \
        TCGv_ptr ra, rb, rc, rd;                                        \
        if (unlikely(!ctx->altivec_enabled)) {                          \
            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
            return;                                                     \
        }                                                               \
        ra = gen_avr_ptr(rA(ctx->opcode));                              \
        rb = gen_avr_ptr(rB(ctx->opcode));                              \
        rc = gen_avr_ptr(rC(ctx->opcode));                              \
        rd = gen_avr_ptr(rD(ctx->opcode));                              \
        if (Rc(ctx->opcode)) {                                          \
            gen_helper_##name1(cpu_env, rd, ra, rb, rc);                \
        } else {                                                        \
            gen_helper_##name0(cpu_env, rd, ra, rb, rc);                \
        }                                                               \
        tcg_temp_free_ptr(ra);                                          \
        tcg_temp_free_ptr(rb);                                          \
        tcg_temp_free_ptr(rc);                                          \
        tcg_temp_free_ptr(rd);                                          \
    }

GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16)

static void gen_vmladduhm(DisasContext *ctx)
{
    TCGv_ptr ra, rb, rc, rd;
    if (unlikely(!ctx->altivec_enabled)) {
        gen_exception(ctx, POWERPC_EXCP_VPU);
        return;
    }
    ra = gen_avr_ptr(rA(ctx->opcode));
    rb = gen_avr_ptr(rB(ctx->opcode));
    rc = gen_avr_ptr(rC(ctx->opcode));
    rd = gen_avr_ptr(rD(ctx->opcode));
    gen_helper_vmladduhm(rd, ra, rb, rc);
    tcg_temp_free_ptr(ra);
    tcg_temp_free_ptr(rb);
    tcg_temp_free_ptr(rc);
    tcg_temp_free_ptr(rd);
}

static bool do_va_helper(DisasContext *ctx, arg_VA *a,
    void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
{
    TCGv_ptr vrt, vra, vrb, vrc;
    REQUIRE_VECTOR(ctx);

    vrt = gen_avr_ptr(a->vrt);
    vra = gen_avr_ptr(a->vra);
    vrb = gen_avr_ptr(a->vrb);
    vrc = gen_avr_ptr(a->rc);
    gen_helper(vrt, vra, vrb, vrc);
    tcg_temp_free_ptr(vrt);
    tcg_temp_free_ptr(vra);
    tcg_temp_free_ptr(vrb);
    tcg_temp_free_ptr(vrc);

    return true;
}

TRANS_FLAGS(ALTIVEC, VPERM, do_va_helper, gen_helper_VPERM)
TRANS_FLAGS2(ISA300, VPERMR, do_va_helper, gen_helper_VPERMR)

static bool trans_VSEL(DisasContext *ctx, arg_VA *a)
{
    REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
    REQUIRE_VECTOR(ctx);

    tcg_gen_gvec_bitsel(MO_64, avr_full_offset(a->vrt), avr_full_offset(a->rc),
                        avr_full_offset(a->vrb), avr_full_offset(a->vra),
                        16, 16);

    return true;
}

TRANS_FLAGS(ALTIVEC, VMSUMUBM, do_va_helper, gen_helper_VMSUMUBM)
TRANS_FLAGS(ALTIVEC, VMSUMMBM, do_va_helper, gen_helper_VMSUMMBM)
TRANS_FLAGS(ALTIVEC, VMSUMSHM, do_va_helper, gen_helper_VMSUMSHM)
TRANS_FLAGS(ALTIVEC, VMSUMUHM, do_va_helper, gen_helper_VMSUMUHM)

static bool do_va_env_helper(DisasContext *ctx, arg_VA *a,
    void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
{
    TCGv_ptr vrt, vra, vrb, vrc;
    REQUIRE_VECTOR(ctx);

    vrt = gen_avr_ptr(a->vrt);
    vra = gen_avr_ptr(a->vra);
    vrb = gen_avr_ptr(a->vrb);
    vrc = gen_avr_ptr(a->rc);
    gen_helper(cpu_env, vrt, vra, vrb, vrc);
    tcg_temp_free_ptr(vrt);
    tcg_temp_free_ptr(vra);
    tcg_temp_free_ptr(vrb);
    tcg_temp_free_ptr(vrc);

    return true;
}

TRANS_FLAGS(ALTIVEC, VMSUMUHS, do_va_env_helper, gen_helper_VMSUMUHS)
TRANS_FLAGS(ALTIVEC, VMSUMSHS, do_va_env_helper, gen_helper_VMSUMSHS)

GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23)

GEN_VXFORM_NOA(vclzb, 1, 28)
GEN_VXFORM_NOA(vclzh, 1, 29)
GEN_VXFORM_TRANS(vclzw, 1, 30)
GEN_VXFORM_TRANS(vclzd, 1, 31)
GEN_VXFORM_NOA_2(vnegw, 1, 24, 6)
GEN_VXFORM_NOA_2(vnegd, 1, 24, 7)

static void gen_vexts_i64(TCGv_i64 t, TCGv_i64 b, int64_t s)
{
    tcg_gen_sextract_i64(t, b, 0, 64 - s);
}

static void gen_vexts_i32(TCGv_i32 t, TCGv_i32 b, int32_t s)
{
    tcg_gen_sextract_i32(t, b, 0, 32 - s);
}

static void gen_vexts_vec(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t s)
{
    tcg_gen_shli_vec(vece, t, b, s);
    tcg_gen_sari_vec(vece, t, t, s);
}

static bool do_vexts(DisasContext *ctx, arg_VX_tb *a, unsigned vece, int64_t s)
{
    static const TCGOpcode vecop_list[] = {
        INDEX_op_shli_vec, INDEX_op_sari_vec, 0
    };

    static const GVecGen2i op[2] = {
        {
            .fni4 = gen_vexts_i32,
            .fniv = gen_vexts_vec,
            .opt_opc = vecop_list,
            .vece = MO_32
        },
        {
            .fni8 = gen_vexts_i64,
            .fniv = gen_vexts_vec,
            .opt_opc = vecop_list,
            .vece = MO_64
        },
    };

    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
    REQUIRE_VECTOR(ctx);

    tcg_gen_gvec_2i(avr_full_offset(a->vrt), avr_full_offset(a->vrb),
                    16, 16, s, &op[vece - MO_32]);

    return true;
}

TRANS(VEXTSB2W, do_vexts, MO_32, 24);
TRANS(VEXTSH2W, do_vexts, MO_32, 16);
TRANS(VEXTSB2D, do_vexts, MO_64, 56);
TRANS(VEXTSH2D, do_vexts, MO_64, 48);
TRANS(VEXTSW2D, do_vexts, MO_64, 32);

static bool trans_VEXTSD2Q(DisasContext *ctx, arg_VX_tb *a)
{
    TCGv_i64 tmp;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    tmp = tcg_temp_new_i64();

    get_avr64(tmp, a->vrb, false);
    set_avr64(a->vrt, tmp, false);
    tcg_gen_sari_i64(tmp, tmp, 63);
    set_avr64(a->vrt, tmp, true);

    tcg_temp_free_i64(tmp);
    return true;
}

GEN_VXFORM_NOA_2(vctzb, 1, 24, 28)
GEN_VXFORM_NOA_2(vctzh, 1, 24, 29)
GEN_VXFORM_NOA_2(vctzw, 1, 24, 30)
GEN_VXFORM_NOA_2(vctzd, 1, 24, 31)
GEN_VXFORM_NOA_3(vclzlsbb, 1, 24, 0)
GEN_VXFORM_NOA_3(vctzlsbb, 1, 24, 1)
GEN_VXFORM_NOA(vpopcntb, 1, 28)
GEN_VXFORM_NOA(vpopcnth, 1, 29)
GEN_VXFORM_NOA(vpopcntw, 1, 30)
GEN_VXFORM_NOA(vpopcntd, 1, 31)
GEN_VXFORM_DUAL(vclzb, PPC_NONE, PPC2_ALTIVEC_207, \
                vpopcntb, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM_DUAL(vclzh, PPC_NONE, PPC2_ALTIVEC_207, \
                vpopcnth, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM_DUAL(vclzw, PPC_NONE, PPC2_ALTIVEC_207, \
                vpopcntw, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM_DUAL(vclzd, PPC_NONE, PPC2_ALTIVEC_207, \
                vpopcntd, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM(vbpermd, 6, 23);
GEN_VXFORM(vbpermq, 6, 21);
GEN_VXFORM_TRANS(vgbbd, 6, 20);
GEN_VXFORM(vpmsumb, 4, 16)
GEN_VXFORM(vpmsumh, 4, 17)
GEN_VXFORM(vpmsumw, 4, 18)
GEN_VXFORM(vpmsumd, 4, 19)

#define GEN_BCD(op)                                 \
static void gen_##op(DisasContext *ctx)             \
{                                                   \
    TCGv_ptr ra, rb, rd;                            \
    TCGv_i32 ps;                                    \
                                                    \
    if (unlikely(!ctx->altivec_enabled)) {          \
        gen_exception(ctx, POWERPC_EXCP_VPU);       \
        return;                                     \
    }                                               \
                                                    \
    ra = gen_avr_ptr(rA(ctx->opcode));              \
    rb = gen_avr_ptr(rB(ctx->opcode));              \
    rd = gen_avr_ptr(rD(ctx->opcode));              \
                                                    \
    ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
                                                    \
    gen_helper_##op(cpu_crf[6], rd, ra, rb, ps);    \
                                                    \
    tcg_temp_free_ptr(ra);                          \
    tcg_temp_free_ptr(rb);                          \
    tcg_temp_free_ptr(rd);                          \
    tcg_temp_free_i32(ps);                          \
}

#define GEN_BCD2(op)                                \
static void gen_##op(DisasContext *ctx)             \
{                                                   \
    TCGv_ptr rd, rb;                                \
    TCGv_i32 ps;                                    \
                                                    \
    if (unlikely(!ctx->altivec_enabled)) {          \
        gen_exception(ctx, POWERPC_EXCP_VPU);       \
        return;                                     \
    }                                               \
                                                    \
    rb = gen_avr_ptr(rB(ctx->opcode));              \
    rd = gen_avr_ptr(rD(ctx->opcode));              \
                                                    \
    ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
                                                    \
    gen_helper_##op(cpu_crf[6], rd, rb, ps);        \
                                                    \
    tcg_temp_free_ptr(rb);                          \
    tcg_temp_free_ptr(rd);                          \
    tcg_temp_free_i32(ps);                          \
}

GEN_BCD(bcdadd)
GEN_BCD(bcdsub)
GEN_BCD2(bcdcfn)
GEN_BCD2(bcdctn)
GEN_BCD2(bcdcfz)
GEN_BCD2(bcdctz)
GEN_BCD2(bcdcfsq)
GEN_BCD2(bcdctsq)
GEN_BCD2(bcdsetsgn)
GEN_BCD(bcdcpsgn);
GEN_BCD(bcds);
GEN_BCD(bcdus);
GEN_BCD(bcdsr);
GEN_BCD(bcdtrunc);
GEN_BCD(bcdutrunc);

static void gen_xpnd04_1(DisasContext *ctx)
{
    switch (opc4(ctx->opcode)) {
    case 0:
        gen_bcdctsq(ctx);
        break;
    case 2:
        gen_bcdcfsq(ctx);
        break;
    case 4:
        gen_bcdctz(ctx);
        break;
    case 5:
        gen_bcdctn(ctx);
        break;
    case 6:
        gen_bcdcfz(ctx);
        break;
    case 7:
        gen_bcdcfn(ctx);
        break;
    case 31:
        gen_bcdsetsgn(ctx);
        break;
    default:
        gen_invalid(ctx);
        break;
    }
}

static void gen_xpnd04_2(DisasContext *ctx)
{
    switch (opc4(ctx->opcode)) {
    case 0:
        gen_bcdctsq(ctx);
        break;
    case 2:
        gen_bcdcfsq(ctx);
        break;
    case 4:
        gen_bcdctz(ctx);
        break;
    case 6:
        gen_bcdcfz(ctx);
        break;
    case 7:
        gen_bcdcfn(ctx);
        break;
    case 31:
        gen_bcdsetsgn(ctx);
        break;
    default:
        gen_invalid(ctx);
        break;
    }
}


GEN_VXFORM_DUAL(vsubcuw, PPC_ALTIVEC, PPC_NONE, \
                xpnd04_1, PPC_NONE, PPC2_ISA300)
GEN_VXFORM_DUAL(vsubsws, PPC_ALTIVEC, PPC_NONE, \
                xpnd04_2, PPC_NONE, PPC2_ISA300)

GEN_VXFORM_DUAL(vsububm, PPC_ALTIVEC, PPC_NONE, \
                bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM_DUAL(vsububs, PPC_ALTIVEC, PPC_NONE, \
                bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM_DUAL(vsubuhm, PPC_ALTIVEC, PPC_NONE, \
                bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM_DUAL(vsubuhs, PPC_ALTIVEC, PPC_NONE, \
                bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM_DUAL(vaddshs, PPC_ALTIVEC, PPC_NONE, \
                bcdcpsgn, PPC_NONE, PPC2_ISA300)
GEN_VXFORM_DUAL(vsubudm, PPC2_ALTIVEC_207, PPC_NONE, \
                bcds, PPC_NONE, PPC2_ISA300)
GEN_VXFORM_DUAL(vsubuwm, PPC_ALTIVEC, PPC_NONE, \
                bcdus, PPC_NONE, PPC2_ISA300)
GEN_VXFORM_DUAL(vsubsbs, PPC_ALTIVEC, PPC_NONE, \
                bcdtrunc, PPC_NONE, PPC2_ISA300)
GEN_VXFORM_DUAL(vsubuqm, PPC2_ALTIVEC_207, PPC_NONE, \
                bcdtrunc, PPC_NONE, PPC2_ISA300)
GEN_VXFORM_DUAL(vsubcuq, PPC2_ALTIVEC_207, PPC_NONE, \
                bcdutrunc, PPC_NONE, PPC2_ISA300)


static void gen_vsbox(DisasContext *ctx)
{
    TCGv_ptr ra, rd;
    if (unlikely(!ctx->altivec_enabled)) {
        gen_exception(ctx, POWERPC_EXCP_VPU);
        return;
    }
    ra = gen_avr_ptr(rA(ctx->opcode));
    rd = gen_avr_ptr(rD(ctx->opcode));
    gen_helper_vsbox(rd, ra);
    tcg_temp_free_ptr(ra);
    tcg_temp_free_ptr(rd);
}

GEN_VXFORM(vcipher, 4, 20)
GEN_VXFORM(vcipherlast, 4, 20)
GEN_VXFORM(vncipher, 4, 21)
GEN_VXFORM(vncipherlast, 4, 21)

GEN_VXFORM_DUAL(vcipher, PPC_NONE, PPC2_ALTIVEC_207,
                vcipherlast, PPC_NONE, PPC2_ALTIVEC_207)
GEN_VXFORM_DUAL(vncipher, PPC_NONE, PPC2_ALTIVEC_207,
                vncipherlast, PPC_NONE, PPC2_ALTIVEC_207)

#define VSHASIGMA(op)                         \
static void gen_##op(DisasContext *ctx)       \
{                                             \
    TCGv_ptr ra, rd;                          \
    TCGv_i32 st_six;                          \
    if (unlikely(!ctx->altivec_enabled)) {    \
        gen_exception(ctx, POWERPC_EXCP_VPU); \
        return;                               \
    }                                         \
    ra = gen_avr_ptr(rA(ctx->opcode));        \
    rd = gen_avr_ptr(rD(ctx->opcode));        \
    st_six = tcg_const_i32(rB(ctx->opcode));  \
    gen_helper_##op(rd, ra, st_six);          \
    tcg_temp_free_ptr(ra);                    \
    tcg_temp_free_ptr(rd);                    \
    tcg_temp_free_i32(st_six);                \
}

VSHASIGMA(vshasigmaw)
VSHASIGMA(vshasigmad)

GEN_VXFORM3(vpermxor, 22, 0xFF)
GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE,
                vpermxor, PPC_NONE, PPC2_ALTIVEC_207)

static bool trans_VCFUGED(DisasContext *ctx, arg_VX *a)
{
    static const GVecGen3 g = {
        .fni8 = gen_helper_CFUGED,
        .vece = MO_64,
    };

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
                   avr_full_offset(a->vrb), 16, 16, &g);

    return true;
}

static bool trans_VCLZDM(DisasContext *ctx, arg_VX *a)
{
    static const GVecGen3i g = {
        .fni8 = do_cntzdm,
        .vece = MO_64,
    };

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    tcg_gen_gvec_3i(avr_full_offset(a->vrt), avr_full_offset(a->vra),
                    avr_full_offset(a->vrb), 16, 16, false, &g);

    return true;
}

static bool trans_VCTZDM(DisasContext *ctx, arg_VX *a)
{
    static const GVecGen3i g = {
        .fni8 = do_cntzdm,
        .vece = MO_64,
    };

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    tcg_gen_gvec_3i(avr_full_offset(a->vrt), avr_full_offset(a->vra),
                    avr_full_offset(a->vrb), 16, 16, true, &g);

    return true;
}

static bool trans_VPDEPD(DisasContext *ctx, arg_VX *a)
{
    static const GVecGen3 g = {
        .fni8 = gen_helper_PDEPD,
        .vece = MO_64,
    };

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
                   avr_full_offset(a->vrb), 16, 16, &g);

    return true;
}

static bool trans_VPEXTD(DisasContext *ctx, arg_VX *a)
{
    static const GVecGen3 g = {
        .fni8 = gen_helper_PEXTD,
        .vece = MO_64,
    };

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
                   avr_full_offset(a->vrb), 16, 16, &g);

    return true;
}

static bool trans_VMSUMUDM(DisasContext *ctx, arg_VA *a)
{
    TCGv_i64 rl, rh, src1, src2;
    int dw;

    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
    REQUIRE_VECTOR(ctx);

    rh = tcg_temp_new_i64();
    rl = tcg_temp_new_i64();
    src1 = tcg_temp_new_i64();
    src2 = tcg_temp_new_i64();

    get_avr64(rl, a->rc, false);
    get_avr64(rh, a->rc, true);

    for (dw = 0; dw < 2; dw++) {
        get_avr64(src1, a->vra, dw);
        get_avr64(src2, a->vrb, dw);
        tcg_gen_mulu2_i64(src1, src2, src1, src2);
        tcg_gen_add2_i64(rl, rh, rl, rh, src1, src2);
    }

    set_avr64(a->vrt, rl, false);
    set_avr64(a->vrt, rh, true);

    tcg_temp_free_i64(rl);
    tcg_temp_free_i64(rh);
    tcg_temp_free_i64(src1);
    tcg_temp_free_i64(src2);

    return true;
}

static bool trans_VMSUMCUD(DisasContext *ctx, arg_VA *a)
{
    TCGv_i64 tmp0, tmp1, prod1h, prod1l, prod0h, prod0l, zero;

    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    tmp0 = tcg_temp_new_i64();
    tmp1 = tcg_temp_new_i64();
    prod1h = tcg_temp_new_i64();
    prod1l = tcg_temp_new_i64();
    prod0h = tcg_temp_new_i64();
    prod0l = tcg_temp_new_i64();
    zero = tcg_constant_i64(0);

    /* prod1 = vsr[vra+32].dw[1] * vsr[vrb+32].dw[1] */
    get_avr64(tmp0, a->vra, false);
    get_avr64(tmp1, a->vrb, false);
    tcg_gen_mulu2_i64(prod1l, prod1h, tmp0, tmp1);

    /* prod0 = vsr[vra+32].dw[0] * vsr[vrb+32].dw[0] */
    get_avr64(tmp0, a->vra, true);
    get_avr64(tmp1, a->vrb, true);
    tcg_gen_mulu2_i64(prod0l, prod0h, tmp0, tmp1);

    /* Sum lower 64-bits elements */
    get_avr64(tmp1, a->rc, false);
    tcg_gen_add2_i64(tmp1, tmp0, tmp1, zero, prod1l, zero);
    tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod0l, zero);

    /*
     * Discard lower 64-bits, leaving the carry into bit 64.
     * Then sum the higher 64-bit elements.
     */
    get_avr64(tmp1, a->rc, true);
    tcg_gen_add2_i64(tmp1, tmp0, tmp0, zero, tmp1, zero);
    tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod1h, zero);
    tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod0h, zero);

    /* Discard 64 more bits to complete the CHOP128(temp >> 128) */
    set_avr64(a->vrt, tmp0, false);
    set_avr64(a->vrt, zero, true);

    tcg_temp_free_i64(tmp0);
    tcg_temp_free_i64(tmp1);
    tcg_temp_free_i64(prod1h);
    tcg_temp_free_i64(prod1l);
    tcg_temp_free_i64(prod0h);
    tcg_temp_free_i64(prod0l);

    return true;
}

static bool do_vx_helper(DisasContext *ctx, arg_VX *a,
                         void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr))
{
    TCGv_ptr ra, rb, rd;
    REQUIRE_VECTOR(ctx);

    ra = gen_avr_ptr(a->vra);
    rb = gen_avr_ptr(a->vrb);
    rd = gen_avr_ptr(a->vrt);
    gen_helper(rd, ra, rb);
    tcg_temp_free_ptr(ra);
    tcg_temp_free_ptr(rb);
    tcg_temp_free_ptr(rd);

    return true;
}

static bool do_vx_vmuleo(DisasContext *ctx, arg_VX *a, bool even,
                         void (*gen_mul)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
{
    TCGv_i64 vra, vrb, vrt0, vrt1;
    REQUIRE_VECTOR(ctx);

    vra = tcg_temp_new_i64();
    vrb = tcg_temp_new_i64();
    vrt0 = tcg_temp_new_i64();
    vrt1 = tcg_temp_new_i64();

    get_avr64(vra, a->vra, even);
    get_avr64(vrb, a->vrb, even);
    gen_mul(vrt0, vrt1, vra, vrb);
    set_avr64(a->vrt, vrt0, false);
    set_avr64(a->vrt, vrt1, true);

    tcg_temp_free_i64(vra);
    tcg_temp_free_i64(vrb);
    tcg_temp_free_i64(vrt0);
    tcg_temp_free_i64(vrt1);

    return true;
}

static bool trans_VMULLD(DisasContext *ctx, arg_VX *a)
{
    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    tcg_gen_gvec_mul(MO_64, avr_full_offset(a->vrt), avr_full_offset(a->vra),
                     avr_full_offset(a->vrb), 16, 16);

    return true;
}

TRANS_FLAGS(ALTIVEC, VMULESB, do_vx_helper, gen_helper_VMULESB)
TRANS_FLAGS(ALTIVEC, VMULOSB, do_vx_helper, gen_helper_VMULOSB)
TRANS_FLAGS(ALTIVEC, VMULEUB, do_vx_helper, gen_helper_VMULEUB)
TRANS_FLAGS(ALTIVEC, VMULOUB, do_vx_helper, gen_helper_VMULOUB)
TRANS_FLAGS(ALTIVEC, VMULESH, do_vx_helper, gen_helper_VMULESH)
TRANS_FLAGS(ALTIVEC, VMULOSH, do_vx_helper, gen_helper_VMULOSH)
TRANS_FLAGS(ALTIVEC, VMULEUH, do_vx_helper, gen_helper_VMULEUH)
TRANS_FLAGS(ALTIVEC, VMULOUH, do_vx_helper, gen_helper_VMULOUH)
TRANS_FLAGS2(ALTIVEC_207, VMULESW, do_vx_helper, gen_helper_VMULESW)
TRANS_FLAGS2(ALTIVEC_207, VMULOSW, do_vx_helper, gen_helper_VMULOSW)
TRANS_FLAGS2(ALTIVEC_207, VMULEUW, do_vx_helper, gen_helper_VMULEUW)
TRANS_FLAGS2(ALTIVEC_207, VMULOUW, do_vx_helper, gen_helper_VMULOUW)
TRANS_FLAGS2(ISA310, VMULESD, do_vx_vmuleo, true , tcg_gen_muls2_i64)
TRANS_FLAGS2(ISA310, VMULOSD, do_vx_vmuleo, false, tcg_gen_muls2_i64)
TRANS_FLAGS2(ISA310, VMULEUD, do_vx_vmuleo, true , tcg_gen_mulu2_i64)
TRANS_FLAGS2(ISA310, VMULOUD, do_vx_vmuleo, false, tcg_gen_mulu2_i64)

static void do_vx_vmulhw_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, bool sign)
{
    TCGv_i64 hh, lh, temp;

    hh = tcg_temp_new_i64();
    lh = tcg_temp_new_i64();
    temp = tcg_temp_new_i64();

    if (sign) {
        tcg_gen_ext32s_i64(lh, a);
        tcg_gen_ext32s_i64(temp, b);
    } else {
        tcg_gen_ext32u_i64(lh, a);
        tcg_gen_ext32u_i64(temp, b);
    }
    tcg_gen_mul_i64(lh, lh, temp);

    if (sign) {
        tcg_gen_sari_i64(hh, a, 32);
        tcg_gen_sari_i64(temp, b, 32);
    } else {
        tcg_gen_shri_i64(hh, a, 32);
        tcg_gen_shri_i64(temp, b, 32);
    }
    tcg_gen_mul_i64(hh, hh, temp);

    tcg_gen_shri_i64(lh, lh, 32);
    tcg_gen_deposit_i64(t, hh, lh, 0, 32);

    tcg_temp_free_i64(hh);
    tcg_temp_free_i64(lh);
    tcg_temp_free_i64(temp);
}

static void do_vx_vmulhd_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, bool sign)
{
    TCGv_i64 tlow;

    tlow  = tcg_temp_new_i64();
    if (sign) {
        tcg_gen_muls2_i64(tlow, t, a, b);
    } else {
        tcg_gen_mulu2_i64(tlow, t, a, b);
    }

    tcg_temp_free_i64(tlow);
}

static bool do_vx_mulh(DisasContext *ctx, arg_VX *a, bool sign,
                       void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, bool))
{
    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
    REQUIRE_VECTOR(ctx);

    TCGv_i64 vra, vrb, vrt;
    int i;

    vra = tcg_temp_new_i64();
    vrb = tcg_temp_new_i64();
    vrt = tcg_temp_new_i64();

    for (i = 0; i < 2; i++) {
        get_avr64(vra, a->vra, i);
        get_avr64(vrb, a->vrb, i);
        get_avr64(vrt, a->vrt, i);

        func(vrt, vra, vrb, sign);

        set_avr64(a->vrt, vrt, i);
    }

    tcg_temp_free_i64(vra);
    tcg_temp_free_i64(vrb);
    tcg_temp_free_i64(vrt);

    return true;

}

TRANS(VMULHSW, do_vx_mulh, true , do_vx_vmulhw_i64)
TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64)
TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64)
TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64)

#undef GEN_VR_LDX
#undef GEN_VR_STX
#undef GEN_VR_LVE
#undef GEN_VR_STVE

#undef GEN_VX_LOGICAL
#undef GEN_VX_LOGICAL_207
#undef GEN_VXFORM
#undef GEN_VXFORM_207
#undef GEN_VXFORM_DUAL
#undef GEN_VXRFORM_DUAL
#undef GEN_VXRFORM1
#undef GEN_VXRFORM
#undef GEN_VXFORM_VSPLTI
#undef GEN_VXFORM_NOA
#undef GEN_VXFORM_UIMM
#undef GEN_VAFORM_PAIRED

#undef GEN_BCD2
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								/*
 								 * translate/vmx-impl.c
 								 *
 								 * Altivec/VMX translation
 								 */
 								/***                      Altivec vector extension                         ***/
 								/* Altivec registers moves */
 								static inline TCGv_ptr gen_avr_ptr(int reg)
 								{
 								    TCGv_ptr r = tcg_temp_new_ptr();
-												target/ppc: introduce avr_full_offset() function

All TCG vector operations require pointers to the base address of the vector
rather than separate access to the top and bottom 64-bits. Convert the VMX TCG
instructions to use a new avr_full_offset() function instead of avr64_offset()
which can then itself be written as a simple wrapper onto vsr_full_offset().

This same function can also reused in cpu_avr_ptr() to avoid having more than
one copy of the offset calculation logic.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Message-Id: <20190307180520.13868-5-mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-03-07 21:05:17 +03:00
+								    tcg_gen_addi_ptr(r, cpu_env, avr_full_offset(reg));
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    return r;
 								}
 								#define GEN_VR_LDX(name, opc2, opc3)                                          \
-												target/ppc: Style fixes for translate/vmx-impl.inc.c

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Greg Kurz <groug@kaod.org>

											
										
										
											2019-03-21 15:47:02 +03:00
+								static void glue(gen_, name)(DisasContext *ctx)                               \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								{                                                                             \
 								    TCGv EA;                                                                  \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    TCGv_i64 avr;                                                             \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    if (unlikely(!ctx->altivec_enabled)) {                                    \
 								        gen_exception(ctx, POWERPC_EXCP_VPU);                                 \
 								        return;                                                               \
 								    }                                                                         \
 								    gen_set_access_type(ctx, ACCESS_INT);                                     \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    avr = tcg_temp_new_i64();                                                 \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    EA = tcg_temp_new();                                                      \
 								    gen_addr_reg_index(ctx, EA);                                              \
 								    tcg_gen_andi_tl(EA, EA, ~0xf);                                            \
-												target/ppc: Style fixes for translate/vmx-impl.inc.c

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Greg Kurz <groug@kaod.org>

											
										
										
											2019-03-21 15:47:02 +03:00
+								    /*                                                                        \
 								     * We only need to swap high and low halves. gen_qemu_ld64_i64            \
 								     * does necessary 64-bit byteswap already.                                \
 								     */                                                                       \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    if (ctx->le_mode) {                                                       \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        gen_qemu_ld64_i64(ctx, avr, EA);                                      \
 								        set_avr64(rD(ctx->opcode), avr, false);                               \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								        tcg_gen_addi_tl(EA, EA, 8);                                           \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        gen_qemu_ld64_i64(ctx, avr, EA);                                      \
 								        set_avr64(rD(ctx->opcode), avr, true);                                \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    } else {                                                                  \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        gen_qemu_ld64_i64(ctx, avr, EA);                                      \
 								        set_avr64(rD(ctx->opcode), avr, true);                                \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								        tcg_gen_addi_tl(EA, EA, 8);                                           \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        gen_qemu_ld64_i64(ctx, avr, EA);                                      \
 								        set_avr64(rD(ctx->opcode), avr, false);                               \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    }                                                                         \
 								    tcg_temp_free(EA);                                                        \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    tcg_temp_free_i64(avr);                                                   \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								}
 								#define GEN_VR_STX(name, opc2, opc3)                                          \
 								static void gen_st##name(DisasContext *ctx)                                   \
 								{                                                                             \
 								    TCGv EA;                                                                  \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    TCGv_i64 avr;                                                             \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    if (unlikely(!ctx->altivec_enabled)) {                                    \
 								        gen_exception(ctx, POWERPC_EXCP_VPU);                                 \
 								        return;                                                               \
 								    }                                                                         \
 								    gen_set_access_type(ctx, ACCESS_INT);                                     \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    avr = tcg_temp_new_i64();                                                 \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    EA = tcg_temp_new();                                                      \
 								    gen_addr_reg_index(ctx, EA);                                              \
 								    tcg_gen_andi_tl(EA, EA, ~0xf);                                            \
-												target/ppc: Style fixes for translate/vmx-impl.inc.c

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Greg Kurz <groug@kaod.org>

											
										
										
											2019-03-21 15:47:02 +03:00
+								    /*                                                                        \
 								     * We only need to swap high and low halves. gen_qemu_st64_i64            \
 								     * does necessary 64-bit byteswap already.                                \
 								     */                                                                       \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    if (ctx->le_mode) {                                                       \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        get_avr64(avr, rD(ctx->opcode), false);                               \
 								        gen_qemu_st64_i64(ctx, avr, EA);                                      \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								        tcg_gen_addi_tl(EA, EA, 8);                                           \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        get_avr64(avr, rD(ctx->opcode), true);                                \
 								        gen_qemu_st64_i64(ctx, avr, EA);                                      \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    } else {                                                                  \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        get_avr64(avr, rD(ctx->opcode), true);                                \
 								        gen_qemu_st64_i64(ctx, avr, EA);                                      \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								        tcg_gen_addi_tl(EA, EA, 8);                                           \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        get_avr64(avr, rD(ctx->opcode), false);                               \
 								        gen_qemu_st64_i64(ctx, avr, EA);                                      \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    }                                                                         \
 								    tcg_temp_free(EA);                                                        \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    tcg_temp_free_i64(avr);                                                   \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								}
 								#define GEN_VR_LVE(name, opc2, opc3, size)                              \
 								static void gen_lve##name(DisasContext *ctx)                            \
 								    {                                                                   \
 								        TCGv EA;                                                        \
 								        TCGv_ptr rs;                                                    \
 								        if (unlikely(!ctx->altivec_enabled)) {                          \
 								            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
 								            return;                                                     \
 								        }                                                               \
 								        gen_set_access_type(ctx, ACCESS_INT);                           \
 								        EA = tcg_temp_new();                                            \
 								        gen_addr_reg_index(ctx, EA);                                    \
 								        if (size > 1) {                                                 \
 								            tcg_gen_andi_tl(EA, EA, ~(size - 1));                       \
 								        }                                                               \
 								        rs = gen_avr_ptr(rS(ctx->opcode));                              \
 								        gen_helper_lve##name(cpu_env, rs, EA);                          \
 								        tcg_temp_free(EA);                                              \
 								        tcg_temp_free_ptr(rs);                                          \
 								    }
 								#define GEN_VR_STVE(name, opc2, opc3, size)                             \
 								static void gen_stve##name(DisasContext *ctx)                           \
 								    {                                                                   \
 								        TCGv EA;                                                        \
 								        TCGv_ptr rs;                                                    \
 								        if (unlikely(!ctx->altivec_enabled)) {                          \
 								            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
 								            return;                                                     \
 								        }                                                               \
 								        gen_set_access_type(ctx, ACCESS_INT);                           \
 								        EA = tcg_temp_new();                                            \
 								        gen_addr_reg_index(ctx, EA);                                    \
 								        if (size > 1) {                                                 \
 								            tcg_gen_andi_tl(EA, EA, ~(size - 1));                       \
 								        }                                                               \
 								        rs = gen_avr_ptr(rS(ctx->opcode));                              \
 								        gen_helper_stve##name(cpu_env, rs, EA);                         \
 								        tcg_temp_free(EA);                                              \
 								        tcg_temp_free_ptr(rs);                                          \
 								    }
 								GEN_VR_LDX(lvx, 0x07, 0x03);
 								/* As we don't emulate the cache, lvxl is stricly equivalent to lvx */
 								GEN_VR_LDX(lvxl, 0x07, 0x0B);
 								GEN_VR_LVE(bx, 0x07, 0x00, 1);
 								GEN_VR_LVE(hx, 0x07, 0x01, 2);
 								GEN_VR_LVE(wx, 0x07, 0x02, 4);
 								GEN_VR_STX(svx, 0x07, 0x07);
 								/* As we don't emulate the cache, stvxl is stricly equivalent to stvx */
 								GEN_VR_STX(svxl, 0x07, 0x0F);
 								GEN_VR_STVE(bx, 0x07, 0x04, 1);
 								GEN_VR_STVE(hx, 0x07, 0x05, 2);
 								GEN_VR_STVE(wx, 0x07, 0x06, 4);
 								static void gen_mfvscr(DisasContext *ctx)
 								{
 								    TCGv_i32 t;
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    TCGv_i64 avr;
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    if (unlikely(!ctx->altivec_enabled)) {
 								        gen_exception(ctx, POWERPC_EXCP_VPU);
 								        return;
 								    }
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    avr = tcg_temp_new_i64();
 								    tcg_gen_movi_i64(avr, 0);
 								    set_avr64(rD(ctx->opcode), avr, true);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    t = tcg_temp_new_i32();
-												target/ppc: Add helper_mfvscr

This is required before changing the representation of the register.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-13-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:53 +03:00
+								    gen_helper_mfvscr(t, cpu_env);
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    tcg_gen_extu_i32_i64(avr, t);
 								    set_avr64(rD(ctx->opcode), avr, false);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    tcg_temp_free_i32(t);
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    tcg_temp_free_i64(avr);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								}
 								static void gen_mtvscr(DisasContext *ctx)
 								{
-												target/ppc: Pass integer to helper_mtvscr

We can re-use this helper elsewhere if we're not passing
in an entire vector register.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-10-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:50 +03:00
+								    TCGv_i32 val;
 								    int bofs;
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    if (unlikely(!ctx->altivec_enabled)) {
 								        gen_exception(ctx, POWERPC_EXCP_VPU);
 								        return;
 								    }
-												target/ppc: Pass integer to helper_mtvscr

We can re-use this helper elsewhere if we're not passing
in an entire vector register.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-10-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:50 +03:00
 								    val = tcg_temp_new_i32();
-												target/ppc: introduce avr_full_offset() function

All TCG vector operations require pointers to the base address of the vector
rather than separate access to the top and bottom 64-bits. Convert the VMX TCG
instructions to use a new avr_full_offset() function instead of avr64_offset()
which can then itself be written as a simple wrapper onto vsr_full_offset().

This same function can also reused in cpu_avr_ptr() to avoid having more than
one copy of the offset calculation logic.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Message-Id: <20190307180520.13868-5-mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-03-07 21:05:17 +03:00
+								    bofs = avr_full_offset(rB(ctx->opcode));
-												Replace config-time define HOST_WORDS_BIGENDIAN

Replace a config-time define with a compile time condition
define (compatible with clang and gcc) that must be declared prior to
its usage. This avoids having a global configure time define, but also
prevents from bad usage, if the config header wasn't included before.

This can help to make some code independent from qemu too.

gcc supports __BYTE_ORDER__ from about 4.6 and clang from 3.2.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
[ For the s390x parts I'm involved in ]
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220323155743.1585078-7-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

											
										
										
											2022-03-23 18:57:17 +03:00
+								#if HOST_BIG_ENDIAN
-												target/ppc: Pass integer to helper_mtvscr

We can re-use this helper elsewhere if we're not passing
in an entire vector register.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-10-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:50 +03:00
+								    bofs += 3 * 4;
 								#endif
 								    tcg_gen_ld_i32(val, cpu_env, bofs);
 								    gen_helper_mtvscr(cpu_env, val);
 								    tcg_temp_free_i32(val);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								}
-												target-ppc: add vmul10[u,eu,cu,ecu]q instructions

vmul10uq  : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-24 12:14:58 +03:00
+								#define GEN_VX_VMUL10(name, add_cin, ret_carry)                         \
 								static void glue(gen_, name)(DisasContext *ctx)                         \
 								{                                                                       \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    TCGv_i64 t0;                                                        \
 								    TCGv_i64 t1;                                                        \
 								    TCGv_i64 t2;                                                        \
 								    TCGv_i64 avr;                                                       \
-												target-ppc: add vmul10[u,eu,cu,ecu]q instructions

vmul10uq  : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-24 12:14:58 +03:00
+								    TCGv_i64 ten, z;                                                    \
 								                                                                        \
 								    if (unlikely(!ctx->altivec_enabled)) {                              \
 								        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
 								        return;                                                         \
 								    }                                                                   \
 								                                                                        \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    t0 = tcg_temp_new_i64();                                            \
 								    t1 = tcg_temp_new_i64();                                            \
 								    t2 = tcg_temp_new_i64();                                            \
 								    avr = tcg_temp_new_i64();                                           \
-												target-ppc: add vmul10[u,eu,cu,ecu]q instructions

vmul10uq  : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-24 12:14:58 +03:00
+								    ten = tcg_const_i64(10);                                            \
 								    z = tcg_const_i64(0);                                               \
 								                                                                        \
 								    if (add_cin) {                                                      \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        get_avr64(avr, rA(ctx->opcode), false);                         \
 								        tcg_gen_mulu2_i64(t0, t1, avr, ten);                            \
 								        get_avr64(avr, rB(ctx->opcode), false);                         \
 								        tcg_gen_andi_i64(t2, avr, 0xF);                                 \
 								        tcg_gen_add2_i64(avr, t2, t0, t1, t2, z);                       \
 								        set_avr64(rD(ctx->opcode), avr, false);                         \
-												target-ppc: add vmul10[u,eu,cu,ecu]q instructions

vmul10uq  : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-24 12:14:58 +03:00
+								    } else {                                                            \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        get_avr64(avr, rA(ctx->opcode), false);                         \
 								        tcg_gen_mulu2_i64(avr, t2, avr, ten);                           \
 								        set_avr64(rD(ctx->opcode), avr, false);                         \
-												target-ppc: add vmul10[u,eu,cu,ecu]q instructions

vmul10uq  : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-24 12:14:58 +03:00
+								    }                                                                   \
 								                                                                        \
 								    if (ret_carry) {                                                    \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        get_avr64(avr, rA(ctx->opcode), true);                          \
 								        tcg_gen_mulu2_i64(t0, t1, avr, ten);                            \
 								        tcg_gen_add2_i64(t0, avr, t0, t1, t2, z);                       \
 								        set_avr64(rD(ctx->opcode), avr, false);                         \
 								        set_avr64(rD(ctx->opcode), z, true);                            \
-												target-ppc: add vmul10[u,eu,cu,ecu]q instructions

vmul10uq  : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-24 12:14:58 +03:00
+								    } else {                                                            \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        get_avr64(avr, rA(ctx->opcode), true);                          \
 								        tcg_gen_mul_i64(t0, avr, ten);                                  \
 								        tcg_gen_add_i64(avr, t0, t2);                                   \
 								        set_avr64(rD(ctx->opcode), avr, true);                          \
-												target-ppc: add vmul10[u,eu,cu,ecu]q instructions

vmul10uq  : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-24 12:14:58 +03:00
+								    }                                                                   \
 								                                                                        \
 								    tcg_temp_free_i64(t0);                                              \
 								    tcg_temp_free_i64(t1);                                              \
 								    tcg_temp_free_i64(t2);                                              \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    tcg_temp_free_i64(avr);                                             \
-												target-ppc: add vmul10[u,eu,cu,ecu]q instructions

vmul10uq  : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-24 12:14:58 +03:00
+								    tcg_temp_free_i64(ten);                                             \
 								    tcg_temp_free_i64(z);                                               \
 								}                                                                       \
 								GEN_VX_VMUL10(vmul10uq, 0, 0);
 								GEN_VX_VMUL10(vmul10euq, 1, 0);
 								GEN_VX_VMUL10(vmul10cuq, 0, 1);
 								GEN_VX_VMUL10(vmul10ecuq, 1, 1);
-												target/ppc: convert VMX logical instructions to use vector operations

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-2-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:42 +03:00
+								#define GEN_VXFORM_V(name, vece, tcg_op, opc2, opc3)                    \
 								static void glue(gen_, name)(DisasContext *ctx)                         \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								{                                                                       \
 								    if (unlikely(!ctx->altivec_enabled)) {                              \
 								        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
 								        return;                                                         \
 								    }                                                                   \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								                                                                        \
-												target/ppc: convert VMX logical instructions to use vector operations

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-2-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:42 +03:00
+								    tcg_op(vece,                                                        \
-												target/ppc: introduce avr_full_offset() function

All TCG vector operations require pointers to the base address of the vector
rather than separate access to the top and bottom 64-bits. Convert the VMX TCG
instructions to use a new avr_full_offset() function instead of avr64_offset()
which can then itself be written as a simple wrapper onto vsr_full_offset().

This same function can also reused in cpu_avr_ptr() to avoid having more than
one copy of the offset calculation logic.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Message-Id: <20190307180520.13868-5-mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-03-07 21:05:17 +03:00
+								           avr_full_offset(rD(ctx->opcode)),                            \
 								           avr_full_offset(rA(ctx->opcode)),                            \
 								           avr_full_offset(rB(ctx->opcode)),                            \
-												target/ppc: convert VMX logical instructions to use vector operations

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-2-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:42 +03:00
+, 16);                                                     \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								}
-												target/ppc: convert VMX logical instructions to use vector operations

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-2-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:42 +03:00
+								/* Logical operations */
 								GEN_VXFORM_V(vand, MO_64, tcg_gen_gvec_and, 2, 16);
 								GEN_VXFORM_V(vandc, MO_64, tcg_gen_gvec_andc, 2, 17);
 								GEN_VXFORM_V(vor, MO_64, tcg_gen_gvec_or, 2, 18);
 								GEN_VXFORM_V(vxor, MO_64, tcg_gen_gvec_xor, 2, 19);
 								GEN_VXFORM_V(vnor, MO_64, tcg_gen_gvec_nor, 2, 20);
 								GEN_VXFORM_V(veqv, MO_64, tcg_gen_gvec_eqv, 2, 26);
 								GEN_VXFORM_V(vnand, MO_64, tcg_gen_gvec_nand, 2, 22);
 								GEN_VXFORM_V(vorc, MO_64, tcg_gen_gvec_orc, 2, 21);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
 								#define GEN_VXFORM(name, opc2, opc3)                                    \
-												target/ppc: Style fixes for translate/vmx-impl.inc.c

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Greg Kurz <groug@kaod.org>

											
										
										
											2019-03-21 15:47:02 +03:00
+								static void glue(gen_, name)(DisasContext *ctx)                         \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								{                                                                       \
 								    TCGv_ptr ra, rb, rd;                                                \
 								    if (unlikely(!ctx->altivec_enabled)) {                              \
 								        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
 								        return;                                                         \
 								    }                                                                   \
 								    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
 								    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
 								    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
-												target/ppc: Style fixes for translate/vmx-impl.inc.c

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Greg Kurz <groug@kaod.org>

											
										
										
											2019-03-21 15:47:02 +03:00
+								    gen_helper_##name(rd, ra, rb);                                      \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    tcg_temp_free_ptr(ra);                                              \
 								    tcg_temp_free_ptr(rb);                                              \
 								    tcg_temp_free_ptr(rd);                                              \
 								}
-												target/ppc: Optimize emulation of lvsl and lvsr instructions

Adding simple macro that is calling tcg implementation of appropriate
instruction if altivec support is active.

Optimization of altivec instruction lvsl (Load Vector for Shift Left).
Place bytes sh:sh+15 of value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F
in destination register. Sh is calculated by adding 2 source registers and
getting bits 60-63 of result.

First, the bits [28-31] are placed from EA to variable sh. After that,
the bytes are created in the following way:
sh:(sh+7) of X(from description) by multiplying sh with 0x0101010101010101
followed by addition of the result with 0x0001020304050607. Value obtained
is placed in higher doubleword element of vD.
(sh+8):(sh+15) by adding the result of previous multiplication with
0x08090a0b0c0d0e0f. Value obtained is placed in lower doubleword element
of vD.

Optimization of altivec instruction lvsr (Load Vector for Shift Right).
Place bytes 16-sh:31-sh of value 0x00 || 0x01 || 0x02 || ... || 0x1E ||
0x1F in destination register. Sh is calculated by adding 2 source
registers and getting bits 60-63 of result.

First, the bits [28-31] are placed from EA to variable sh. After that,
the bytes are created in the following way:
sh:(sh+7) of X(from description) by multiplying sh with 0x0101010101010101
followed by substraction of the result from 0x1011121314151617. Value
obtained is placed in higher doubleword element of vD.
(sh+8):(sh+15) by substracting the result of previous multiplication from
0x18191a1b1c1d1e1f. Value obtained is placed in lower doubleword element
of vD.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-2-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:47 +03:00
+								#define GEN_VXFORM_TRANS(name, opc2, opc3)                              \
 								static void glue(gen_, name)(DisasContext *ctx)                         \
 								{                                                                       \
 								    if (unlikely(!ctx->altivec_enabled)) {                              \
 								        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
 								        return;                                                         \
 								    }                                                                   \
 								    trans_##name(ctx);                                                  \
 								}
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								#define GEN_VXFORM_ENV(name, opc2, opc3)                                \
 								static void glue(gen_, name)(DisasContext *ctx)                         \
 								{                                                                       \
 								    TCGv_ptr ra, rb, rd;                                                \
 								    if (unlikely(!ctx->altivec_enabled)) {                              \
 								        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
 								        return;                                                         \
 								    }                                                                   \
 								    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
 								    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
 								    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
 								    gen_helper_##name(cpu_env, rd, ra, rb);                             \
 								    tcg_temp_free_ptr(ra);                                              \
 								    tcg_temp_free_ptr(rb);                                              \
 								    tcg_temp_free_ptr(rd);                                              \
 								}
 								#define GEN_VXFORM3(name, opc2, opc3)                                   \
 								static void glue(gen_, name)(DisasContext *ctx)                         \
 								{                                                                       \
 								    TCGv_ptr ra, rb, rc, rd;                                            \
 								    if (unlikely(!ctx->altivec_enabled)) {                              \
 								        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
 								        return;                                                         \
 								    }                                                                   \
 								    ra = gen_avr_ptr(rA(ctx->opcode));                                  \
 								    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
 								    rc = gen_avr_ptr(rC(ctx->opcode));                                  \
 								    rd = gen_avr_ptr(rD(ctx->opcode));                                  \
 								    gen_helper_##name(rd, ra, rb, rc);                                  \
 								    tcg_temp_free_ptr(ra);                                              \
 								    tcg_temp_free_ptr(rb);                                              \
 								    tcg_temp_free_ptr(rc);                                              \
 								    tcg_temp_free_ptr(rd);                                              \
 								}
 								/*
 								 * Support for Altivec instruction pairs that use bit 31 (Rc) as
 								 * an opcode bit.  In general, these pairs come from different
 								 * versions of the ISA, so we must also support a pair of flags for
 								 * each instruction.
 								 */
 								#define GEN_VXFORM_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)          \
 								static void glue(gen_, name0##_##name1)(DisasContext *ctx)             \
 								{                                                                      \
 								    if ((Rc(ctx->opcode) == 0) &&                                      \
 								        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
 								        gen_##name0(ctx);                                              \
 								    } else if ((Rc(ctx->opcode) == 1) &&                               \
 								        ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
 								        gen_##name1(ctx);                                              \
 								    } else {                                                           \
 								        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);            \
 								    }                                                                  \
 								}
-												target/ppc: Refactor emulation of vmrgew and vmrgow instructions

Since I found this two instructions implemented with tcg, I refactored
them so they are consistent with other similar implementations that
I introduced in this patch.

Also, a new dual macro GEN_VXFORM_TRANS_DUAL is added. This macro is
used if one instruction is realized with direct translation, and second
one with a helper.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1566898663-25858-4-git-send-email-stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-08-27 12:37:43 +03:00
+								/*
 								 * We use this macro if one instruction is realized with direct
 								 * translation, and second one with helper.
 								 */
 								#define GEN_VXFORM_TRANS_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)\
 								static void glue(gen_, name0##_##name1)(DisasContext *ctx)             \
 								{                                                                      \
 								    if ((Rc(ctx->opcode) == 0) &&                                      \
 								        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
 								        if (unlikely(!ctx->altivec_enabled)) {                         \
 								            gen_exception(ctx, POWERPC_EXCP_VPU);                      \
 								            return;                                                    \
 								        }                                                              \
 								        trans_##name0(ctx);                                            \
 								    } else if ((Rc(ctx->opcode) == 1) &&                               \
 								        ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
 								        gen_##name1(ctx);                                              \
 								    } else {                                                           \
 								        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);            \
 								    }                                                                  \
 								}
-												target-ppc: add vmul10[u,eu,cu,ecu]q instructions

vmul10uq  : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-24 12:14:58 +03:00
+								/* Adds support to provide invalid mask */
 								#define GEN_VXFORM_DUAL_EXT(name0, flg0, flg2_0, inval0,                \
 								                            name1, flg1, flg2_1, inval1)                \
 								static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
 								{                                                                       \
 								    if ((Rc(ctx->opcode) == 0) &&                                       \
 								        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0)) &&  \
 								        !(ctx->opcode & inval0)) {                                      \
 								        gen_##name0(ctx);                                               \
 								    } else if ((Rc(ctx->opcode) == 1) &&                                \
 								               ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1)) && \
 								               !(ctx->opcode & inval1)) {                               \
 								        gen_##name1(ctx);                                               \
 								    } else {                                                            \
 								        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);             \
 								    }                                                                   \
 								}
-												target-ppc: add vextu[bhw][lr]x instructions

vextublx: Vector Extract Unsigned Byte Left
vextuhlx: Vector Extract Unsigned Halfword Left
vextuwlx: Vector Extract Unsigned Word Left
vextubrx: Vector Extract Unsigned Byte Right-Indexed VX-form
vextuhrx: Vector Extract Unsigned  Halfword Right-Indexed VX-form
vextuwrx: Vector Extract Unsigned Word Right-Indexed VX-form

Signed-off-by: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
Signed-off-by: Hariharan T.S. <hari@linux.vnet.ibm.com>
[ implement using int128_rshift ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-28 10:56:42 +03:00
+								#define GEN_VXFORM_HETRO(name, opc2, opc3)                              \
 								static void glue(gen_, name)(DisasContext *ctx)                         \
 								{                                                                       \
 								    TCGv_ptr rb;                                                        \
 								    if (unlikely(!ctx->altivec_enabled)) {                              \
 								        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
 								        return;                                                         \
 								    }                                                                   \
 								    rb = gen_avr_ptr(rB(ctx->opcode));                                  \
 								    gen_helper_##name(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], rb); \
 								    tcg_temp_free_ptr(rb);                                              \
 								}
-												target/ppc: convert vaddu[b,h,w,d] and vsubu[b,h,w,d] over to use vector operations

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-3-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:43 +03:00
+								GEN_VXFORM_V(vaddubm, MO_8, tcg_gen_gvec_add, 0, 0);
-												target-ppc: add vmul10[u,eu,cu,ecu]q instructions

vmul10uq  : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-24 12:14:58 +03:00
+								GEN_VXFORM_DUAL_EXT(vaddubm, PPC_ALTIVEC, PPC_NONE, 0,       \
 								                    vmul10cuq, PPC_NONE, PPC2_ISA300, 0x0000F800)
-												target/ppc: convert vaddu[b,h,w,d] and vsubu[b,h,w,d] over to use vector operations

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-3-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:43 +03:00
+								GEN_VXFORM_V(vadduhm, MO_16, tcg_gen_gvec_add, 0, 1);
-												target-ppc: add vmul10[u,eu,cu,ecu]q instructions

vmul10uq  : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-24 12:14:58 +03:00
+								GEN_VXFORM_DUAL(vadduhm, PPC_ALTIVEC, PPC_NONE,  \
 								                vmul10ecuq, PPC_NONE, PPC2_ISA300)
-												target/ppc: convert vaddu[b,h,w,d] and vsubu[b,h,w,d] over to use vector operations

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-3-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:43 +03:00
+								GEN_VXFORM_V(vadduwm, MO_32, tcg_gen_gvec_add, 0, 2);
 								GEN_VXFORM_V(vaddudm, MO_64, tcg_gen_gvec_add, 0, 3);
 								GEN_VXFORM_V(vsububm, MO_8, tcg_gen_gvec_sub, 0, 16);
 								GEN_VXFORM_V(vsubuhm, MO_16, tcg_gen_gvec_sub, 0, 17);
 								GEN_VXFORM_V(vsubuwm, MO_32, tcg_gen_gvec_sub, 0, 18);
 								GEN_VXFORM_V(vsubudm, MO_64, tcg_gen_gvec_sub, 0, 19);
-												target/ppc: convert vmin* and vmax* to vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-18-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:58 +03:00
+								GEN_VXFORM_V(vmaxub, MO_8, tcg_gen_gvec_umax, 1, 0);
 								GEN_VXFORM_V(vmaxuh, MO_16, tcg_gen_gvec_umax, 1, 1);
 								GEN_VXFORM_V(vmaxuw, MO_32, tcg_gen_gvec_umax, 1, 2);
 								GEN_VXFORM_V(vmaxud, MO_64, tcg_gen_gvec_umax, 1, 3);
 								GEN_VXFORM_V(vmaxsb, MO_8, tcg_gen_gvec_smax, 1, 4);
 								GEN_VXFORM_V(vmaxsh, MO_16, tcg_gen_gvec_smax, 1, 5);
 								GEN_VXFORM_V(vmaxsw, MO_32, tcg_gen_gvec_smax, 1, 6);
 								GEN_VXFORM_V(vmaxsd, MO_64, tcg_gen_gvec_smax, 1, 7);
 								GEN_VXFORM_V(vminub, MO_8, tcg_gen_gvec_umin, 1, 8);
 								GEN_VXFORM_V(vminuh, MO_16, tcg_gen_gvec_umin, 1, 9);
 								GEN_VXFORM_V(vminuw, MO_32, tcg_gen_gvec_umin, 1, 10);
 								GEN_VXFORM_V(vminud, MO_64, tcg_gen_gvec_umin, 1, 11);
 								GEN_VXFORM_V(vminsb, MO_8, tcg_gen_gvec_smin, 1, 12);
 								GEN_VXFORM_V(vminsh, MO_16, tcg_gen_gvec_smin, 1, 13);
 								GEN_VXFORM_V(vminsw, MO_32, tcg_gen_gvec_smin, 1, 14);
 								GEN_VXFORM_V(vminsd, MO_64, tcg_gen_gvec_smin, 1, 15);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VXFORM(vavgub, 1, 16);
-												target-ppc: add vabsdu[b,h,w] instructions

Adds following instructions:

vabsdub: Vector Absolute Difference Unsigned Byte
vabsduh: Vector Absolute Difference Unsigned Halfword
vabsduw: Vector Absolute Difference Unsigned Word

Signed-off-by: Sandipan Das <sandipandas1990@gmail.com>
[ use ISA300 define. Drop etype ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-28 21:14:14 +03:00
+								GEN_VXFORM(vabsdub, 1, 16);
 								GEN_VXFORM_DUAL(vavgub, PPC_ALTIVEC, PPC_NONE, \
 								                vabsdub, PPC_NONE, PPC2_ISA300)
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VXFORM(vavguh, 1, 17);
-												target-ppc: add vabsdu[b,h,w] instructions

Adds following instructions:

vabsdub: Vector Absolute Difference Unsigned Byte
vabsduh: Vector Absolute Difference Unsigned Halfword
vabsduw: Vector Absolute Difference Unsigned Word

Signed-off-by: Sandipan Das <sandipandas1990@gmail.com>
[ use ISA300 define. Drop etype ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-28 21:14:14 +03:00
+								GEN_VXFORM(vabsduh, 1, 17);
 								GEN_VXFORM_DUAL(vavguh, PPC_ALTIVEC, PPC_NONE, \
 								                vabsduh, PPC_NONE, PPC2_ISA300)
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VXFORM(vavguw, 1, 18);
-												target-ppc: add vabsdu[b,h,w] instructions

Adds following instructions:

vabsdub: Vector Absolute Difference Unsigned Byte
vabsduh: Vector Absolute Difference Unsigned Halfword
vabsduw: Vector Absolute Difference Unsigned Word

Signed-off-by: Sandipan Das <sandipandas1990@gmail.com>
[ use ISA300 define. Drop etype ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-28 21:14:14 +03:00
+								GEN_VXFORM(vabsduw, 1, 18);
 								GEN_VXFORM_DUAL(vavguw, PPC_ALTIVEC, PPC_NONE, \
 								                vabsduw, PPC_NONE, PPC2_ISA300)
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VXFORM(vavgsb, 1, 20);
 								GEN_VXFORM(vavgsh, 1, 21);
 								GEN_VXFORM(vavgsw, 1, 22);
 								GEN_VXFORM(vmrghb, 6, 0);
 								GEN_VXFORM(vmrghh, 6, 1);
 								GEN_VXFORM(vmrghw, 6, 2);
 								GEN_VXFORM(vmrglb, 6, 4);
 								GEN_VXFORM(vmrglh, 6, 5);
 								GEN_VXFORM(vmrglw, 6, 6);
-												target/ppc: Refactor emulation of vmrgew and vmrgow instructions

Since I found this two instructions implemented with tcg, I refactored
them so they are consistent with other similar implementations that
I introduced in this patch.

Also, a new dual macro GEN_VXFORM_TRANS_DUAL is added. This macro is
used if one instruction is realized with direct translation, and second
one with a helper.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1566898663-25858-4-git-send-email-stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-08-27 12:37:43 +03:00
+								static void trans_vmrgew(DisasContext *ctx)
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								{
-												target/ppc: Refactor emulation of vmrgew and vmrgow instructions

Since I found this two instructions implemented with tcg, I refactored
them so they are consistent with other similar implementations that
I introduced in this patch.

Also, a new dual macro GEN_VXFORM_TRANS_DUAL is added. This macro is
used if one instruction is realized with direct translation, and second
one with a helper.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1566898663-25858-4-git-send-email-stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-08-27 12:37:43 +03:00
+								    int VT = rD(ctx->opcode);
 								    int VA = rA(ctx->opcode);
 								    int VB = rB(ctx->opcode);
 								    TCGv_i64 tmp = tcg_temp_new_i64();
 								    TCGv_i64 avr = tcg_temp_new_i64();
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
 								    get_avr64(avr, VB, true);
 								    tcg_gen_shri_i64(tmp, avr, 32);
 								    get_avr64(avr, VA, true);
 								    tcg_gen_deposit_i64(avr, avr, tmp, 0, 32);
 								    set_avr64(VT, avr, true);
 								    get_avr64(avr, VB, false);
 								    tcg_gen_shri_i64(tmp, avr, 32);
 								    get_avr64(avr, VA, false);
 								    tcg_gen_deposit_i64(avr, avr, tmp, 0, 32);
 								    set_avr64(VT, avr, false);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    tcg_temp_free_i64(tmp);
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								    tcg_temp_free_i64(avr);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								}
-												target/ppc: Refactor emulation of vmrgew and vmrgow instructions

Since I found this two instructions implemented with tcg, I refactored
them so they are consistent with other similar implementations that
I introduced in this patch.

Also, a new dual macro GEN_VXFORM_TRANS_DUAL is added. This macro is
used if one instruction is realized with direct translation, and second
one with a helper.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1566898663-25858-4-git-send-email-stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-08-27 12:37:43 +03:00
+								static void trans_vmrgow(DisasContext *ctx)
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								{
-												target/ppc: Refactor emulation of vmrgew and vmrgow instructions

Since I found this two instructions implemented with tcg, I refactored
them so they are consistent with other similar implementations that
I introduced in this patch.

Also, a new dual macro GEN_VXFORM_TRANS_DUAL is added. This macro is
used if one instruction is realized with direct translation, and second
one with a helper.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1566898663-25858-4-git-send-email-stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-08-27 12:37:43 +03:00
+								    int VT = rD(ctx->opcode);
 								    int VA = rA(ctx->opcode);
 								    int VB = rB(ctx->opcode);
 								    TCGv_i64 t0 = tcg_temp_new_i64();
 								    TCGv_i64 t1 = tcg_temp_new_i64();
 								    TCGv_i64 avr = tcg_temp_new_i64();
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
 								    get_avr64(t0, VB, true);
 								    get_avr64(t1, VA, true);
 								    tcg_gen_deposit_i64(avr, t0, t1, 32, 32);
 								    set_avr64(VT, avr, true);
 								    get_avr64(t0, VB, false);
 								    get_avr64(t1, VA, false);
 								    tcg_gen_deposit_i64(avr, t0, t1, 32, 32);
 								    set_avr64(VT, avr, false);
 								    tcg_temp_free_i64(t0);
 								    tcg_temp_free_i64(t1);
 								    tcg_temp_free_i64(avr);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								}
-												target/ppc: Optimize emulation of lvsl and lvsr instructions

Adding simple macro that is calling tcg implementation of appropriate
instruction if altivec support is active.

Optimization of altivec instruction lvsl (Load Vector for Shift Left).
Place bytes sh:sh+15 of value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F
in destination register. Sh is calculated by adding 2 source registers and
getting bits 60-63 of result.

First, the bits [28-31] are placed from EA to variable sh. After that,
the bytes are created in the following way:
sh:(sh+7) of X(from description) by multiplying sh with 0x0101010101010101
followed by addition of the result with 0x0001020304050607. Value obtained
is placed in higher doubleword element of vD.
(sh+8):(sh+15) by adding the result of previous multiplication with
0x08090a0b0c0d0e0f. Value obtained is placed in lower doubleword element
of vD.

Optimization of altivec instruction lvsr (Load Vector for Shift Right).
Place bytes 16-sh:31-sh of value 0x00 || 0x01 || 0x02 || ... || 0x1E ||
0x1F in destination register. Sh is calculated by adding 2 source
registers and getting bits 60-63 of result.

First, the bits [28-31] are placed from EA to variable sh. After that,
the bytes are created in the following way:
sh:(sh+7) of X(from description) by multiplying sh with 0x0101010101010101
followed by substraction of the result from 0x1011121314151617. Value
obtained is placed in higher doubleword element of vD.
(sh+8):(sh+15) by substracting the result of previous multiplication from
0x18191a1b1c1d1e1f. Value obtained is placed in lower doubleword element
of vD.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-2-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:47 +03:00
+								/*
 								 * lvsl VRT,RA,RB - Load Vector for Shift Left
 								 *
 								 * Let the EA be the sum (rA|0)+(rB). Let sh=EA[28–31].
 								 * Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F.
 								 * Bytes sh:sh+15 of X are placed into vD.
 								 */
 								static void trans_lvsl(DisasContext *ctx)
 								{
 								    int VT = rD(ctx->opcode);
 								    TCGv_i64 result = tcg_temp_new_i64();
 								    TCGv_i64 sh = tcg_temp_new_i64();
 								    TCGv EA = tcg_temp_new();
 								    /* Get sh(from description) by anding EA with 0xf. */
 								    gen_addr_reg_index(ctx, EA);
 								    tcg_gen_extu_tl_i64(sh, EA);
 								    tcg_gen_andi_i64(sh, sh, 0xfULL);
 								    /*
 								     * Create bytes sh:sh+7 of X(from description) and place them in
 								     * higher doubleword of vD.
 								     */
 								    tcg_gen_muli_i64(sh, sh, 0x0101010101010101ULL);
 								    tcg_gen_addi_i64(result, sh, 0x0001020304050607ull);
 								    set_avr64(VT, result, true);
 								    /*
 								     * Create bytes sh+8:sh+15 of X(from description) and place them in
 								     * lower doubleword of vD.
 								     */
 								    tcg_gen_addi_i64(result, sh, 0x08090a0b0c0d0e0fULL);
 								    set_avr64(VT, result, false);
 								    tcg_temp_free_i64(result);
 								    tcg_temp_free_i64(sh);
 								    tcg_temp_free(EA);
 								}
 								/*
 								 * lvsr VRT,RA,RB - Load Vector for Shift Right
 								 *
 								 * Let the EA be the sum (rA|0)+(rB). Let sh=EA[28–31].
 								 * Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F.
 								 * Bytes (16-sh):(31-sh) of X are placed into vD.
 								 */
 								static void trans_lvsr(DisasContext *ctx)
 								{
 								    int VT = rD(ctx->opcode);
 								    TCGv_i64 result = tcg_temp_new_i64();
 								    TCGv_i64 sh = tcg_temp_new_i64();
 								    TCGv EA = tcg_temp_new();
 								    /* Get sh(from description) by anding EA with 0xf. */
 								    gen_addr_reg_index(ctx, EA);
 								    tcg_gen_extu_tl_i64(sh, EA);
 								    tcg_gen_andi_i64(sh, sh, 0xfULL);
 								    /*
 								     * Create bytes (16-sh):(23-sh) of X(from description) and place them in
 								     * higher doubleword of vD.
 								     */
 								    tcg_gen_muli_i64(sh, sh, 0x0101010101010101ULL);
 								    tcg_gen_subfi_i64(result, 0x1011121314151617ULL, sh);
 								    set_avr64(VT, result, true);
 								    /*
 								     * Create bytes (24-sh):(32-sh) of X(from description) and place them in
 								     * lower doubleword of vD.
 								     */
 								    tcg_gen_subfi_i64(result, 0x18191a1b1c1d1e1fULL, sh);
 								    set_avr64(VT, result, false);
 								    tcg_temp_free_i64(result);
 								    tcg_temp_free_i64(sh);
 								    tcg_temp_free(EA);
 								}
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								/*
 								 * vsl VRT,VRA,VRB - Vector Shift Left
 								 *
 								 * Shifting left 128 bit value of vA by value specified in bits 125-127 of vB.
 								 * Lowest 3 bits in each byte element of register vB must be identical or
 								 * result is undefined.
 								 */
 								static void trans_vsl(DisasContext *ctx)
 								{
 								    int VT = rD(ctx->opcode);
 								    int VA = rA(ctx->opcode);
 								    int VB = rB(ctx->opcode);
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    TCGv_i64 avr = tcg_temp_new_i64();
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								    TCGv_i64 sh = tcg_temp_new_i64();
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    TCGv_i64 carry = tcg_temp_new_i64();
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								    TCGv_i64 tmp = tcg_temp_new_i64();
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    /* Place bits 125-127 of vB in 'sh'. */
 								    get_avr64(avr, VB, false);
 								    tcg_gen_andi_i64(sh, avr, 0x07ULL);
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
 								    /*
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								     * Save highest 'sh' bits of lower doubleword element of vA in variable
 								     * 'carry' and perform shift on lower doubleword.
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								     */
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    get_avr64(avr, VA, false);
 								    tcg_gen_subfi_i64(tmp, 32, sh);
 								    tcg_gen_shri_i64(carry, avr, 32);
 								    tcg_gen_shr_i64(carry, carry, tmp);
 								    tcg_gen_shl_i64(avr, avr, sh);
 								    set_avr64(VT, avr, false);
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
 								    /*
 								     * Perform shift on higher doubleword element of vA and replace lowest
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								     * 'sh' bits with 'carry'.
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								     */
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    get_avr64(avr, VA, true);
 								    tcg_gen_shl_i64(avr, avr, sh);
 								    tcg_gen_or_i64(avr, avr, carry);
 								    set_avr64(VT, avr, true);
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    tcg_temp_free_i64(avr);
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								    tcg_temp_free_i64(sh);
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    tcg_temp_free_i64(carry);
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								    tcg_temp_free_i64(tmp);
 								}
 								/*
 								 * vsr VRT,VRA,VRB - Vector Shift Right
 								 *
 								 * Shifting right 128 bit value of vA by value specified in bits 125-127 of vB.
 								 * Lowest 3 bits in each byte element of register vB must be identical or
 								 * result is undefined.
 								 */
 								static void trans_vsr(DisasContext *ctx)
 								{
 								    int VT = rD(ctx->opcode);
 								    int VA = rA(ctx->opcode);
 								    int VB = rB(ctx->opcode);
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    TCGv_i64 avr = tcg_temp_new_i64();
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								    TCGv_i64 sh = tcg_temp_new_i64();
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    TCGv_i64 carry = tcg_temp_new_i64();
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								    TCGv_i64 tmp = tcg_temp_new_i64();
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    /* Place bits 125-127 of vB in 'sh'. */
 								    get_avr64(avr, VB, false);
 								    tcg_gen_andi_i64(sh, avr, 0x07ULL);
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
 								    /*
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								     * Save lowest 'sh' bits of higher doubleword element of vA in variable
 								     * 'carry' and perform shift on higher doubleword.
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								     */
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    get_avr64(avr, VA, true);
 								    tcg_gen_subfi_i64(tmp, 32, sh);
 								    tcg_gen_shli_i64(carry, avr, 32);
 								    tcg_gen_shl_i64(carry, carry, tmp);
 								    tcg_gen_shr_i64(avr, avr, sh);
 								    set_avr64(VT, avr, true);
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								    /*
 								     * Perform shift on lower doubleword element of vA and replace highest
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								     * 'sh' bits with 'carry'.
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								     */
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    get_avr64(avr, VA, false);
 								    tcg_gen_shr_i64(avr, avr, sh);
 								    tcg_gen_or_i64(avr, avr, carry);
 								    set_avr64(VT, avr, false);
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    tcg_temp_free_i64(avr);
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								    tcg_temp_free_i64(sh);
-												target/ppc: Fix for optimized vsl/vsr instructions

In previous implementation, invocation of TCG shift function could request
shift of TCG variable by 64 bits when variable 'sh' is 0, which is not
supported in TCG (values can be shifted by 0 to 63 bits). This patch fixes
this by using two separate invocation of TCG shift functions, with maximum
shift amount of 32.

Name of variable 'shifted' is changed to 'carry' so variable naming
is similar to old helper implementation.

Variables 'avrA' and 'avrB' are replaced with variable 'avr'.

Fixes: 4e6d0920e7547e6af4bbac5ffe9adfe6ea621822
Reported-by: "Paul A. Clark" <pc@us.ibm.com>
Reported-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Suggested-by: Aleksandar Markovic <aleksandar.markovic@rt-rk.com>
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1570196639-7025-2-git-send-email-stefan.brankovic@rt-rk.com>
Tested-by: Paul A. Clarke  <pc@us.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-10-04 16:43:59 +03:00
+								    tcg_temp_free_i64(carry);
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								    tcg_temp_free_i64(tmp);
 								}
-												target/ppc: Optimize emulation of vgbbd instruction

Optimize altivec instruction vgbbd (Vector Gather Bits by Bytes by Doubleword)
All ith bits (i in range 1 to 8) of each byte of doubleword element in
source register are concatenated and placed into ith byte of appropriate
doubleword element in destination register.

Following solution is done for both doubleword elements of source register
in parallel, in order to reduce the number of instructions needed(that's why
arrays are used):
First, both doubleword elements of source register vB are placed in
appropriate element of array avr. Bits are gathered in 2x8 iterations(2 for
loops). In first iteration bit 1 of byte 1, bit 2 of byte 2,... bit 8 of
byte 8 are in their final spots so avr[i], i={0,1} can be and-ed with
tcg_mask. For every following iteration, both avr[i] and tcg_mask variables
have to be shifted right for 7 and 8 places, respectively, in order to get
bit 1 of byte 2, bit 2 of byte 3.. bit 7 of byte 8 in their final spots so
shifted avr values(saved in tmp) can be and-ed with new value of tcg_mask...
After first 8 iteration(first loop), all the first bits are in their final
places, all second bits but second bit from eight byte are in their places...
only 1 eight bit from eight byte is in it's place). In second loop we do all
operations symmetrically, in order to get other half of bits in their final
spots. Results for first and second doubleword elements are saved in
result[0] and result[1] respectively. In the end those results are saved in
appropriate doubleword element of destination register vD.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-5-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:50 +03:00
+								/*
 								 * vgbbd VRT,VRB - Vector Gather Bits by Bytes by Doubleword
 								 *
 								 * All ith bits (i in range 1 to 8) of each byte of doubleword element in source
 								 * register are concatenated and placed into ith byte of appropriate doubleword
 								 * element in destination register.
 								 *
 								 * Following solution is done for both doubleword elements of source register
 								 * in parallel, in order to reduce the number of instructions needed(that's why
 								 * arrays are used):
 								 * First, both doubleword elements of source register vB are placed in
 								 * appropriate element of array avr. Bits are gathered in 2x8 iterations(2 for
 								 * loops). In first iteration bit 1 of byte 1, bit 2 of byte 2,... bit 8 of
 								 * byte 8 are in their final spots so avr[i], i={0,1} can be and-ed with
 								 * tcg_mask. For every following iteration, both avr[i] and tcg_mask variables
 								 * have to be shifted right for 7 and 8 places, respectively, in order to get
 								 * bit 1 of byte 2, bit 2 of byte 3.. bit 7 of byte 8 in their final spots so
 								 * shifted avr values(saved in tmp) can be and-ed with new value of tcg_mask...
 								 * After first 8 iteration(first loop), all the first bits are in their final
 								 * places, all second bits but second bit from eight byte are in their places...
 								 * only 1 eight bit from eight byte is in it's place). In second loop we do all
 								 * operations symmetrically, in order to get other half of bits in their final
 								 * spots. Results for first and second doubleword elements are saved in
 								 * result[0] and result[1] respectively. In the end those results are saved in
 								 * appropriate doubleword element of destination register vD.
 								 */
 								static void trans_vgbbd(DisasContext *ctx)
 								{
 								    int VT = rD(ctx->opcode);
 								    int VB = rB(ctx->opcode);
 								    TCGv_i64 tmp = tcg_temp_new_i64();
 								    uint64_t mask = 0x8040201008040201ULL;
 								    int i, j;
 								    TCGv_i64 result[2];
 								    result[0] = tcg_temp_new_i64();
 								    result[1] = tcg_temp_new_i64();
 								    TCGv_i64 avr[2];
 								    avr[0] = tcg_temp_new_i64();
 								    avr[1] = tcg_temp_new_i64();
 								    TCGv_i64 tcg_mask = tcg_temp_new_i64();
 								    tcg_gen_movi_i64(tcg_mask, mask);
 								    for (j = 0; j < 2; j++) {
 								        get_avr64(avr[j], VB, j);
 								        tcg_gen_and_i64(result[j], avr[j], tcg_mask);
 								    }
 								    for (i = 1; i < 8; i++) {
 								        tcg_gen_movi_i64(tcg_mask, mask >> (i * 8));
 								        for (j = 0; j < 2; j++) {
 								            tcg_gen_shri_i64(tmp, avr[j], i * 7);
 								            tcg_gen_and_i64(tmp, tmp, tcg_mask);
 								            tcg_gen_or_i64(result[j], result[j], tmp);
 								        }
 								    }
 								    for (i = 1; i < 8; i++) {
 								        tcg_gen_movi_i64(tcg_mask, mask << (i * 8));
 								        for (j = 0; j < 2; j++) {
 								            tcg_gen_shli_i64(tmp, avr[j], i * 7);
 								            tcg_gen_and_i64(tmp, tmp, tcg_mask);
 								            tcg_gen_or_i64(result[j], result[j], tmp);
 								        }
 								    }
 								    for (j = 0; j < 2; j++) {
 								        set_avr64(VT, result[j], j);
 								    }
 								    tcg_temp_free_i64(tmp);
 								    tcg_temp_free_i64(tcg_mask);
 								    tcg_temp_free_i64(result[0]);
 								    tcg_temp_free_i64(result[1]);
 								    tcg_temp_free_i64(avr[0]);
 								    tcg_temp_free_i64(avr[1]);
 								}
-												target/ppc: Optimize emulation of vclzw instruction

Optimize Altivec instruction vclzw (Vector Count Leading Zeros Word).
This instruction counts the number of leading zeros of each word element
in source register and places result in the appropriate word element of
destination register.

Counting is to be performed in four iterations of for loop(one for each
word elemnt of source register vB). Every iteration consists of loading
appropriate word element from source register, counting leading zeros
with tcg_gen_clzi_i32, and saving the result in appropriate word element
of destination register.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-7-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:52 +03:00
+								/*
 								 * vclzw VRT,VRB - Vector Count Leading Zeros Word
 								 *
 								 * Counting the number of leading zero bits of each word element in source
 								 * register and placing result in appropriate word element of destination
 								 * register.
 								 */
 								static void trans_vclzw(DisasContext *ctx)
 								{
 								    int VT = rD(ctx->opcode);
 								    int VB = rB(ctx->opcode);
 								    TCGv_i32 tmp = tcg_temp_new_i32();
 								    int i;
 								    /* Perform count for every word element using tcg_gen_clzi_i32. */
 								    for (i = 0; i < 4; i++) {
 								        tcg_gen_ld_i32(tmp, cpu_env,
 								            offsetof(CPUPPCState, vsr[32 + VB].u64[0]) + i * 4);
 								        tcg_gen_clzi_i32(tmp, tmp, 32);
 								        tcg_gen_st_i32(tmp, cpu_env,
 								            offsetof(CPUPPCState, vsr[32 + VT].u64[0]) + i * 4);
 								    }
 								    tcg_temp_free_i32(tmp);
 								}
-												target/ppc: Optimize emulation of vclzd instruction

Optimize Altivec instruction vclzd (Vector Count Leading Zeros Doubleword).
This instruction counts the number of leading zeros of each doubleword element
in source register and places result in the appropriate doubleword element of
destination register.

Using tcg-s count leading zeros instruction two times(once for each
doubleword element of source register vB) and placing result in
appropriate doubleword element of destination register vD.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-6-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:51 +03:00
+								/*
 								 * vclzd VRT,VRB - Vector Count Leading Zeros Doubleword
 								 *
 								 * Counting the number of leading zero bits of each doubleword element in source
 								 * register and placing result in appropriate doubleword element of destination
 								 * register.
 								 */
 								static void trans_vclzd(DisasContext *ctx)
 								{
 								    int VT = rD(ctx->opcode);
 								    int VB = rB(ctx->opcode);
 								    TCGv_i64 avr = tcg_temp_new_i64();
 								    /* high doubleword */
 								    get_avr64(avr, VB, true);
 								    tcg_gen_clzi_i64(avr, avr, 64);
 								    set_avr64(VT, avr, true);
 								    /* low doubleword */
 								    get_avr64(avr, VB, false);
 								    tcg_gen_clzi_i64(avr, avr, 64);
 								    set_avr64(VT, avr, false);
 								    tcg_temp_free_i64(avr);
 								}
-												target/ppc: convert vmuluwm to tcg_gen_gvec_mul

Convert the original implementation of vmuluwm to the more generic
tcg_gen_gvec_mul.

Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Message-Id: <20200701234344.91843-5-ljp@linux.ibm.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2020-07-02 02:43:39 +03:00
+								GEN_VXFORM_V(vmuluwm, MO_32, tcg_gen_gvec_mul, 4, 2);
-												target-ppc: add vsrv instruction

Adds Vector Shift Right Variable instruction.

Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
[ reverse the order of computation to avoid temporary array ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-28 21:14:17 +03:00
+								GEN_VXFORM(vsrv, 2, 28);
-												target-ppc: add vslv instruction

vslv: Vector Shift Left Variable

Signed-off-by: Vivek Andrew Sha <vivekandrewsha@gmail.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-28 21:14:16 +03:00
+								GEN_VXFORM(vslv, 2, 29);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VXFORM(vslo, 6, 16);
 								GEN_VXFORM(vsro, 6, 17);
 								GEN_VXFORM(vaddcuw, 0, 6);
 								GEN_VXFORM(vsubcuw, 0, 22);
-												target/ppc: convert vadd*s and vsub*s to vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-17-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:57 +03:00
-												target/ppc: move vs[lr][a][bhwd] to decodetree

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-20-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static bool do_vector_gvec3_VX(DisasContext *ctx, arg_VX *a, int vece,
 								                               void (*gen_gvec)(unsigned, uint32_t, uint32_t,
 								                                                uint32_t, uint32_t, uint32_t))
 								{
 								    REQUIRE_VECTOR(ctx);
 								    gen_gvec(vece, avr_full_offset(a->vrt), avr_full_offset(a->vra),
 								             avr_full_offset(a->vrb), 16, 16);
 								    return true;
 								}
 								TRANS_FLAGS(ALTIVEC, VSLB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_shlv);
 								TRANS_FLAGS(ALTIVEC, VSLH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_shlv);
 								TRANS_FLAGS(ALTIVEC, VSLW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_shlv);
 								TRANS_FLAGS2(ALTIVEC_207, VSLD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_shlv);
 								TRANS_FLAGS(ALTIVEC, VSRB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_shrv);
 								TRANS_FLAGS(ALTIVEC, VSRH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_shrv);
 								TRANS_FLAGS(ALTIVEC, VSRW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_shrv);
 								TRANS_FLAGS2(ALTIVEC_207, VSRD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_shrv);
 								TRANS_FLAGS(ALTIVEC, VSRAB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_sarv);
 								TRANS_FLAGS(ALTIVEC, VSRAH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_sarv);
 								TRANS_FLAGS(ALTIVEC, VSRAW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_sarv);
 								TRANS_FLAGS2(ALTIVEC_207, VSRAD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_sarv);
-												target/ppc: move vrl[bhwd] to decodetree

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-24-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								TRANS_FLAGS(ALTIVEC, VRLB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_rotlv)
 								TRANS_FLAGS(ALTIVEC, VRLH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_rotlv)
 								TRANS_FLAGS(ALTIVEC, VRLW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_rotlv)
 								TRANS_FLAGS2(ALTIVEC_207, VRLD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_rotlv)
-												target/ppc: move vrl[bhwd]nm/vrl[bhwd]mi to decodetree

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-25-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static TCGv_vec do_vrl_mask_vec(unsigned vece, TCGv_vec vrb)
 								{
 								    TCGv_vec t0 = tcg_temp_new_vec_matching(vrb),
 								             t1 = tcg_temp_new_vec_matching(vrb),
 								             t2 = tcg_temp_new_vec_matching(vrb),
 								             ones = tcg_constant_vec_matching(vrb, vece, -1);
 								    /* Extract b and e */
 								    tcg_gen_dupi_vec(vece, t2, (8 << vece) - 1);
 								    tcg_gen_shri_vec(vece, t0, vrb, 16);
 								    tcg_gen_and_vec(vece, t0, t0, t2);
 								    tcg_gen_shri_vec(vece, t1, vrb, 8);
 								    tcg_gen_and_vec(vece, t1, t1, t2);
 								    /* Compare b and e to negate the mask where begin > end */
 								    tcg_gen_cmp_vec(TCG_COND_GT, vece, t2, t0, t1);
 								    /* Create the mask with (~0 >> b) ^ ((~0 >> e) >> 1) */
 								    tcg_gen_shrv_vec(vece, t0, ones, t0);
 								    tcg_gen_shrv_vec(vece, t1, ones, t1);
 								    tcg_gen_shri_vec(vece, t1, t1, 1);
 								    tcg_gen_xor_vec(vece, t0, t0, t1);
 								    /* negate the mask */
 								    tcg_gen_xor_vec(vece, t0, t0, t2);
 								    tcg_temp_free_vec(t1);
 								    tcg_temp_free_vec(t2);
 								    return t0;
 								}
 								static void gen_vrlnm_vec(unsigned vece, TCGv_vec vrt, TCGv_vec vra,
 								                          TCGv_vec vrb)
 								{
 								    TCGv_vec mask, n = tcg_temp_new_vec_matching(vrt);
 								    /* Create the mask */
 								    mask = do_vrl_mask_vec(vece, vrb);
 								    /* Extract n */
 								    tcg_gen_dupi_vec(vece, n, (8 << vece) - 1);
 								    tcg_gen_and_vec(vece, n, vrb, n);
 								    /* Rotate and mask */
 								    tcg_gen_rotlv_vec(vece, vrt, vra, n);
 								    tcg_gen_and_vec(vece, vrt, vrt, mask);
 								    tcg_temp_free_vec(n);
 								    tcg_temp_free_vec(mask);
 								}
 								static bool do_vrlnm(DisasContext *ctx, arg_VX *a, int vece)
 								{
 								    static const TCGOpcode vecop_list[] = {
 								        INDEX_op_cmp_vec, INDEX_op_rotlv_vec, INDEX_op_sari_vec,
 								        INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_shrv_vec, 0
 								    };
 								    static const GVecGen3 ops[2] = {
 								        {
 								            .fniv = gen_vrlnm_vec,
 								            .fno = gen_helper_VRLWNM,
 								            .opt_opc = vecop_list,
 								            .load_dest = true,
 								            .vece = MO_32
 								        },
 								        {
 								            .fniv = gen_vrlnm_vec,
 								            .fno = gen_helper_VRLDNM,
 								            .opt_opc = vecop_list,
 								            .load_dest = true,
 								            .vece = MO_64
 								        }
 								    };
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 								    REQUIRE_VSX(ctx);
 								    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
 								                   avr_full_offset(a->vrb), 16, 16, &ops[vece - 2]);
 								    return true;
 								}
 								TRANS(VRLWNM, do_vrlnm, MO_32)
 								TRANS(VRLDNM, do_vrlnm, MO_64)
 								static void gen_vrlmi_vec(unsigned vece, TCGv_vec vrt, TCGv_vec vra,
 								                          TCGv_vec vrb)
 								{
 								    TCGv_vec mask, n = tcg_temp_new_vec_matching(vrt),
 								             tmp = tcg_temp_new_vec_matching(vrt);
 								    /* Create the mask */
 								    mask = do_vrl_mask_vec(vece, vrb);
 								    /* Extract n */
 								    tcg_gen_dupi_vec(vece, n, (8 << vece) - 1);
 								    tcg_gen_and_vec(vece, n, vrb, n);
 								    /* Rotate and insert */
 								    tcg_gen_rotlv_vec(vece, tmp, vra, n);
 								    tcg_gen_bitsel_vec(vece, vrt, mask, tmp, vrt);
 								    tcg_temp_free_vec(n);
 								    tcg_temp_free_vec(tmp);
 								    tcg_temp_free_vec(mask);
 								}
 								static bool do_vrlmi(DisasContext *ctx, arg_VX *a, int vece)
 								{
 								    static const TCGOpcode vecop_list[] = {
 								        INDEX_op_cmp_vec, INDEX_op_rotlv_vec, INDEX_op_sari_vec,
 								        INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_shrv_vec, 0
 								    };
 								    static const GVecGen3 ops[2] = {
 								        {
 								            .fniv = gen_vrlmi_vec,
 								            .fno = gen_helper_VRLWMI,
 								            .opt_opc = vecop_list,
 								            .load_dest = true,
 								            .vece = MO_32
 								        },
 								        {
 								            .fniv = gen_vrlnm_vec,
 								            .fno = gen_helper_VRLDMI,
 								            .opt_opc = vecop_list,
 								            .load_dest = true,
 								            .vece = MO_64
 								        }
 								    };
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 								    REQUIRE_VSX(ctx);
 								    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
 								                   avr_full_offset(a->vrb), 16, 16, &ops[vece - 2]);
 								    return true;
 								}
 								TRANS(VRLWMI, do_vrlmi, MO_32)
 								TRANS(VRLDMI, do_vrlmi, MO_64)
-												target/ppc: implement vsraq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-23-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static bool do_vector_shift_quad(DisasContext *ctx, arg_VX *a, bool right,
 								                                 bool alg)
-												target/ppc: implement vslq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-21-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								{
-												target/ppc: implement vsraq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-23-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    TCGv_i64 hi, lo, t0, t1, n, zero = tcg_constant_i64(0);
-												target/ppc: implement vslq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-21-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
 								    REQUIRE_VECTOR(ctx);
 								    n = tcg_temp_new_i64();
 								    hi = tcg_temp_new_i64();
 								    lo = tcg_temp_new_i64();
 								    t0 = tcg_temp_new_i64();
-												target/ppc: implement vsraq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-23-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    t1 = tcg_const_i64(0);
-												target/ppc: implement vslq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-21-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
 								    get_avr64(lo, a->vra, false);
 								    get_avr64(hi, a->vra, true);
 								    get_avr64(n, a->vrb, true);
 								    tcg_gen_andi_i64(t0, n, 64);
-												target/ppc: implement vsrq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-22-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    if (right) {
 								        tcg_gen_movcond_i64(TCG_COND_NE, lo, t0, zero, hi, lo);
-												target/ppc: implement vsraq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-23-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								        if (alg) {
 								            tcg_gen_sari_i64(t1, lo, 63);
 								        }
 								        tcg_gen_movcond_i64(TCG_COND_NE, hi, t0, zero, t1, hi);
-												target/ppc: implement vsrq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-22-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    } else {
 								        tcg_gen_movcond_i64(TCG_COND_NE, hi, t0, zero, lo, hi);
 								        tcg_gen_movcond_i64(TCG_COND_NE, lo, t0, zero, zero, lo);
 								    }
-												target/ppc: implement vslq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-21-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    tcg_gen_andi_i64(n, n, 0x3F);
-												target/ppc: implement vsrq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-22-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    if (right) {
-												target/ppc: implement vsraq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-23-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								        if (alg) {
 								            tcg_gen_sar_i64(t0, hi, n);
 								        } else {
 								            tcg_gen_shr_i64(t0, hi, n);
 								        }
-												target/ppc: implement vsrq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-22-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    } else {
 								        tcg_gen_shl_i64(t0, lo, n);
 								    }
 								    set_avr64(a->vrt, t0, right);
-												target/ppc: implement vslq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-21-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
-												target/ppc: implement vsrq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-22-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    if (right) {
 								        tcg_gen_shr_i64(lo, lo, n);
 								    } else {
 								        tcg_gen_shl_i64(hi, hi, n);
 								    }
-												target/ppc: implement vslq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-21-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    tcg_gen_xori_i64(n, n, 63);
-												target/ppc: implement vsrq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-22-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    if (right) {
 								        tcg_gen_shl_i64(hi, hi, n);
 								        tcg_gen_shli_i64(hi, hi, 1);
 								    } else {
 								        tcg_gen_shr_i64(lo, lo, n);
 								        tcg_gen_shri_i64(lo, lo, 1);
 								    }
-												target/ppc: implement vslq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-21-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    tcg_gen_or_i64(hi, hi, lo);
-												target/ppc: implement vsrq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-22-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    set_avr64(a->vrt, hi, !right);
-												target/ppc: implement vslq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-21-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
 								    tcg_temp_free_i64(hi);
 								    tcg_temp_free_i64(lo);
 								    tcg_temp_free_i64(t0);
-												target/ppc: implement vsraq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-23-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    tcg_temp_free_i64(t1);
-												target/ppc: implement vslq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-21-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    tcg_temp_free_i64(n);
 								    return true;
 								}
-												target/ppc: implement vsraq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-23-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								TRANS_FLAGS2(ISA310, VSLQ, do_vector_shift_quad, false, false);
 								TRANS_FLAGS2(ISA310, VSRQ, do_vector_shift_quad, true, false);
 								TRANS_FLAGS2(ISA310, VSRAQ, do_vector_shift_quad, true, true);
-												target/ppc: implement vsrq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-22-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
-												target/ppc: implement vrlqnm

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-27-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static void do_vrlq_mask(TCGv_i64 mh, TCGv_i64 ml, TCGv_i64 b, TCGv_i64 e)
-												target/ppc: implement vrlq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-26-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								{
-												target/ppc: implement vrlqnm

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-27-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    TCGv_i64 th, tl, t0, t1, zero = tcg_constant_i64(0),
 								             ones = tcg_constant_i64(-1);
 								    th = tcg_temp_new_i64();
 								    tl = tcg_temp_new_i64();
 								    t0 = tcg_temp_new_i64();
 								    t1 = tcg_temp_new_i64();
 								    /* m = ~0 >> b */
 								    tcg_gen_andi_i64(t0, b, 64);
 								    tcg_gen_movcond_i64(TCG_COND_NE, t1, t0, zero, zero, ones);
 								    tcg_gen_andi_i64(t0, b, 0x3F);
 								    tcg_gen_shr_i64(mh, t1, t0);
 								    tcg_gen_shr_i64(ml, ones, t0);
 								    tcg_gen_xori_i64(t0, t0, 63);
 								    tcg_gen_shl_i64(t1, t1, t0);
 								    tcg_gen_shli_i64(t1, t1, 1);
 								    tcg_gen_or_i64(ml, t1, ml);
 								    /* t = ~0 >> e */
 								    tcg_gen_andi_i64(t0, e, 64);
 								    tcg_gen_movcond_i64(TCG_COND_NE, t1, t0, zero, zero, ones);
 								    tcg_gen_andi_i64(t0, e, 0x3F);
 								    tcg_gen_shr_i64(th, t1, t0);
 								    tcg_gen_shr_i64(tl, ones, t0);
 								    tcg_gen_xori_i64(t0, t0, 63);
 								    tcg_gen_shl_i64(t1, t1, t0);
 								    tcg_gen_shli_i64(t1, t1, 1);
 								    tcg_gen_or_i64(tl, t1, tl);
 								    /* t = t >> 1 */
-												target/ppc: use extract/extract2 to create vrlqnm mask

Fixes: 4e272668406b ("target/ppc: implement vrlqnm")
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220304175156.2012315-4-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-05 09:16:47 +03:00
+								    tcg_gen_extract2_i64(tl, tl, th, 1);
-												target/ppc: implement vrlqnm

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-27-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    tcg_gen_shri_i64(th, th, 1);
 								    /* m = m ^ t */
 								    tcg_gen_xor_i64(mh, mh, th);
 								    tcg_gen_xor_i64(ml, ml, tl);
 								    /* Negate the mask if begin > end */
 								    tcg_gen_movcond_i64(TCG_COND_GT, t0, b, e, ones, zero);
 								    tcg_gen_xor_i64(mh, mh, t0);
 								    tcg_gen_xor_i64(ml, ml, t0);
 								    tcg_temp_free_i64(th);
 								    tcg_temp_free_i64(tl);
 								    tcg_temp_free_i64(t0);
 								    tcg_temp_free_i64(t1);
 								}
-												target/ppc: implement vrlqmi

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-28-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static bool do_vector_rotl_quad(DisasContext *ctx, arg_VX *a, bool mask,
 								                                bool insert)
-												target/ppc: implement vrlqnm

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-27-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								{
 								    TCGv_i64 ah, al, vrb, n, t0, t1, zero = tcg_constant_i64(0);
-												target/ppc: implement vrlq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-26-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
 								    REQUIRE_VECTOR(ctx);
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    ah = tcg_temp_new_i64();
 								    al = tcg_temp_new_i64();
-												target/ppc: implement vrlqnm

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-27-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    vrb = tcg_temp_new_i64();
-												target/ppc: implement vrlq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-26-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    n = tcg_temp_new_i64();
 								    t0 = tcg_temp_new_i64();
 								    t1 = tcg_temp_new_i64();
 								    get_avr64(ah, a->vra, true);
 								    get_avr64(al, a->vra, false);
-												target/ppc: implement vrlqnm

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-27-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    get_avr64(vrb, a->vrb, true);
-												target/ppc: implement vrlq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-26-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
 								    tcg_gen_mov_i64(t0, ah);
-												target/ppc: implement vrlqnm

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-27-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    tcg_gen_andi_i64(t1, vrb, 64);
-												target/ppc: implement vrlq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-26-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    tcg_gen_movcond_i64(TCG_COND_NE, ah, t1, zero, al, ah);
 								    tcg_gen_movcond_i64(TCG_COND_NE, al, t1, zero, t0, al);
-												target/ppc: implement vrlqnm

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-27-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    tcg_gen_andi_i64(n, vrb, 0x3F);
-												target/ppc: implement vrlq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-26-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
 								    tcg_gen_shl_i64(t0, ah, n);
 								    tcg_gen_shl_i64(t1, al, n);
 								    tcg_gen_xori_i64(n, n, 63);
 								    tcg_gen_shr_i64(al, al, n);
 								    tcg_gen_shri_i64(al, al, 1);
 								    tcg_gen_or_i64(t0, al, t0);
 								    tcg_gen_shr_i64(ah, ah, n);
 								    tcg_gen_shri_i64(ah, ah, 1);
 								    tcg_gen_or_i64(t1, ah, t1);
-												target/ppc: implement vrlqmi

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-28-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    if (mask || insert) {
-												target/ppc: use extract/extract2 to create vrlqnm mask

Fixes: 4e272668406b ("target/ppc: implement vrlqnm")
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220304175156.2012315-4-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-05 09:16:47 +03:00
+								        tcg_gen_extract_i64(n, vrb, 8, 7);
 								        tcg_gen_extract_i64(vrb, vrb, 16, 7);
-												target/ppc: implement vrlqnm

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-27-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
 								        do_vrlq_mask(ah, al, vrb, n);
 								        tcg_gen_and_i64(t0, t0, ah);
 								        tcg_gen_and_i64(t1, t1, al);
-												target/ppc: implement vrlqmi

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-28-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
 								        if (insert) {
 								            get_avr64(n, a->vrt, true);
 								            get_avr64(vrb, a->vrt, false);
-												target/ppc: use andc in vrlqmi

Fixes: 7e5947df6e94 ("target/ppc: implement vrlqmi")
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220304175156.2012315-5-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-05 09:16:47 +03:00
+								            tcg_gen_andc_i64(n, n, ah);
 								            tcg_gen_andc_i64(vrb, vrb, al);
-												target/ppc: implement vrlqmi

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-28-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								            tcg_gen_or_i64(t0, t0, n);
 								            tcg_gen_or_i64(t1, t1, vrb);
 								        }
-												target/ppc: implement vrlqnm

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-27-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    }
-												target/ppc: implement vrlq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-26-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    set_avr64(a->vrt, t0, true);
 								    set_avr64(a->vrt, t1, false);
 								    tcg_temp_free_i64(ah);
 								    tcg_temp_free_i64(al);
-												target/ppc: implement vrlqnm

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-27-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    tcg_temp_free_i64(vrb);
-												target/ppc: implement vrlq

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-26-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    tcg_temp_free_i64(n);
 								    tcg_temp_free_i64(t0);
 								    tcg_temp_free_i64(t1);
 								    return true;
 								}
-												target/ppc: implement vrlqmi

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-28-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								TRANS(VRLQ, do_vector_rotl_quad, false, false)
 								TRANS(VRLQNM, do_vector_rotl_quad, true, false)
 								TRANS(VRLQMI, do_vector_rotl_quad, false, true)
-												target/ppc: implement vrlqnm

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-27-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
-												target/ppc: convert vadd*s and vsub*s to vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-17-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:57 +03:00
+								#define GEN_VXFORM_SAT(NAME, VECE, NORM, SAT, OPC2, OPC3)               \
 								static void glue(glue(gen_, NAME), _vec)(unsigned vece, TCGv_vec t,     \
 								                                         TCGv_vec sat, TCGv_vec a,      \
 								                                         TCGv_vec b)                    \
 								{                                                                       \
 								    TCGv_vec x = tcg_temp_new_vec_matching(t);                          \
 								    glue(glue(tcg_gen_, NORM), _vec)(VECE, x, a, b);                    \
 								    glue(glue(tcg_gen_, SAT), _vec)(VECE, t, a, b);                     \
 								    tcg_gen_cmp_vec(TCG_COND_NE, VECE, x, x, t);                        \
 								    tcg_gen_or_vec(VECE, sat, sat, x);                                  \
 								    tcg_temp_free_vec(x);                                               \
 								}                                                                       \
 								static void glue(gen_, NAME)(DisasContext *ctx)                         \
 								{                                                                       \
-												tcg: Specify optional vector requirements with a list

Replace the single opcode in .opc with a null-terminated
array in .opt_opc.  We still require that all opcodes be
used with the same .vece.

Validate the contents of this list with CONFIG_DEBUG_TCG.
All tcg_gen_*_vec functions will check any list active
during .fniv expansion.  Swap the active list in and out
as we expand other opcodes, or take control away from the
front-end function.

Convert all existing vector aware front ends.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

											
										
										
											2019-03-17 03:27:29 +03:00
+								    static const TCGOpcode vecop_list[] = {                             \
 								        glue(glue(INDEX_op_, NORM), _vec),                              \
 								        glue(glue(INDEX_op_, SAT), _vec),                               \
 								        INDEX_op_cmp_vec, 0                                             \
 								    };                                                                  \
-												target/ppc: convert vadd*s and vsub*s to vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-17-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:57 +03:00
+								    static const GVecGen4 g = {                                         \
 								        .fniv = glue(glue(gen_, NAME), _vec),                           \
 								        .fno = glue(gen_helper_, NAME),                                 \
-												tcg: Specify optional vector requirements with a list

Replace the single opcode in .opc with a null-terminated
array in .opt_opc.  We still require that all opcodes be
used with the same .vece.

Validate the contents of this list with CONFIG_DEBUG_TCG.
All tcg_gen_*_vec functions will check any list active
during .fniv expansion.  Swap the active list in and out
as we expand other opcodes, or take control away from the
front-end function.

Convert all existing vector aware front ends.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

											
										
										
											2019-03-17 03:27:29 +03:00
+								        .opt_opc = vecop_list,                                          \
-												target/ppc: convert vadd*s and vsub*s to vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-17-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:57 +03:00
+								        .write_aofs = true,                                             \
 								        .vece = VECE,                                                   \
 								    };                                                                  \
 								    if (unlikely(!ctx->altivec_enabled)) {                              \
 								        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
 								        return;                                                         \
 								    }                                                                   \
-												target/ppc: introduce avr_full_offset() function

All TCG vector operations require pointers to the base address of the vector
rather than separate access to the top and bottom 64-bits. Convert the VMX TCG
instructions to use a new avr_full_offset() function instead of avr64_offset()
which can then itself be written as a simple wrapper onto vsr_full_offset().

This same function can also reused in cpu_avr_ptr() to avoid having more than
one copy of the offset calculation logic.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Message-Id: <20190307180520.13868-5-mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-03-07 21:05:17 +03:00
+								    tcg_gen_gvec_4(avr_full_offset(rD(ctx->opcode)),                    \
-												target/ppc: convert vadd*s and vsub*s to vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-17-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:57 +03:00
+								                   offsetof(CPUPPCState, vscr_sat),                     \
-												target/ppc: introduce avr_full_offset() function

All TCG vector operations require pointers to the base address of the vector
rather than separate access to the top and bottom 64-bits. Convert the VMX TCG
instructions to use a new avr_full_offset() function instead of avr64_offset()
which can then itself be written as a simple wrapper onto vsr_full_offset().

This same function can also reused in cpu_avr_ptr() to avoid having more than
one copy of the offset calculation logic.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Message-Id: <20190307180520.13868-5-mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-03-07 21:05:17 +03:00
+								                   avr_full_offset(rA(ctx->opcode)),                    \
 								                   avr_full_offset(rB(ctx->opcode)),                    \
-												target/ppc: convert vadd*s and vsub*s to vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-17-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:57 +03:00
+, 16, &g);                                         \
 								}
 								GEN_VXFORM_SAT(vaddubs, MO_8, add, usadd, 0, 8);
-												target-ppc: add vmul10[u,eu,cu,ecu]q instructions

vmul10uq  : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-24 12:14:58 +03:00
+								GEN_VXFORM_DUAL_EXT(vaddubs, PPC_ALTIVEC, PPC_NONE, 0,       \
 								                    vmul10uq, PPC_NONE, PPC2_ISA300, 0x0000F800)
-												target/ppc: convert vadd*s and vsub*s to vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-17-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:57 +03:00
+								GEN_VXFORM_SAT(vadduhs, MO_16, add, usadd, 0, 9);
-												target-ppc: add vmul10[u,eu,cu,ecu]q instructions

vmul10uq  : Vector Multiply-by-10 Unsigned Quadword VX-form
vmul10euq : Vector Multiply-by-10 Extended Unsigned Quadword VX-form
vmul10cuq : Vector Multiply-by-10 & write Carry Unsigned Quadword VX-form
vmul10ecuq: Vector Multiply-by-10 Extended & write Carry Unsigned Quadword VX-form

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
[ Add GEN_VXFORM_DUAL_EXT with invalid bit mask ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-24 12:14:58 +03:00
+								GEN_VXFORM_DUAL(vadduhs, PPC_ALTIVEC, PPC_NONE, \
 								                vmul10euq, PPC_NONE, PPC2_ISA300)
-												target/ppc: convert vadd*s and vsub*s to vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-17-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:57 +03:00
+								GEN_VXFORM_SAT(vadduws, MO_32, add, usadd, 0, 10);
 								GEN_VXFORM_SAT(vaddsbs, MO_8, add, ssadd, 0, 12);
 								GEN_VXFORM_SAT(vaddshs, MO_16, add, ssadd, 0, 13);
 								GEN_VXFORM_SAT(vaddsws, MO_32, add, ssadd, 0, 14);
 								GEN_VXFORM_SAT(vsububs, MO_8, sub, ussub, 0, 24);
 								GEN_VXFORM_SAT(vsubuhs, MO_16, sub, ussub, 0, 25);
 								GEN_VXFORM_SAT(vsubuws, MO_32, sub, ussub, 0, 26);
 								GEN_VXFORM_SAT(vsubsbs, MO_8, sub, sssub, 0, 28);
 								GEN_VXFORM_SAT(vsubshs, MO_16, sub, sssub, 0, 29);
 								GEN_VXFORM_SAT(vsubsws, MO_32, sub, sssub, 0, 30);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VXFORM(vadduqm, 0, 4);
 								GEN_VXFORM(vaddcuq, 0, 5);
 								GEN_VXFORM3(vaddeuqm, 30, 0);
 								GEN_VXFORM3(vaddecuq, 30, 0);
 								GEN_VXFORM_DUAL(vaddeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
 								            vaddecuq, PPC_NONE, PPC2_ALTIVEC_207)
 								GEN_VXFORM(vsubuqm, 0, 20);
 								GEN_VXFORM(vsubcuq, 0, 21);
 								GEN_VXFORM3(vsubeuqm, 31, 0);
 								GEN_VXFORM3(vsubecuq, 31, 0);
 								GEN_VXFORM_DUAL(vsubeuqm, PPC_NONE, PPC2_ALTIVEC_207, \
 								            vsubecuq, PPC_NONE, PPC2_ALTIVEC_207)
-												target/ppc: Optimize emulation of vsl and vsr instructions

Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.

For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.

For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:48 +03:00
+								GEN_VXFORM_TRANS(vsl, 2, 7);
 								GEN_VXFORM_TRANS(vsr, 2, 11);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VXFORM_ENV(vpkuhum, 7, 0);
 								GEN_VXFORM_ENV(vpkuwum, 7, 1);
 								GEN_VXFORM_ENV(vpkudum, 7, 17);
 								GEN_VXFORM_ENV(vpkuhus, 7, 2);
 								GEN_VXFORM_ENV(vpkuwus, 7, 3);
 								GEN_VXFORM_ENV(vpkudus, 7, 19);
 								GEN_VXFORM_ENV(vpkshus, 7, 4);
 								GEN_VXFORM_ENV(vpkswus, 7, 5);
 								GEN_VXFORM_ENV(vpksdus, 7, 21);
 								GEN_VXFORM_ENV(vpkshss, 7, 6);
 								GEN_VXFORM_ENV(vpkswss, 7, 7);
 								GEN_VXFORM_ENV(vpksdss, 7, 23);
 								GEN_VXFORM(vpkpx, 7, 12);
 								GEN_VXFORM_ENV(vsum4ubs, 4, 24);
 								GEN_VXFORM_ENV(vsum4sbs, 4, 28);
 								GEN_VXFORM_ENV(vsum4shs, 4, 25);
 								GEN_VXFORM_ENV(vsum2sws, 4, 26);
 								GEN_VXFORM_ENV(vsumsws, 4, 30);
 								GEN_VXFORM_ENV(vaddfp, 5, 0);
 								GEN_VXFORM_ENV(vsubfp, 5, 1);
 								GEN_VXFORM_ENV(vmaxfp, 5, 16);
 								GEN_VXFORM_ENV(vminfp, 5, 17);
-												target-ppc: add vextu[bhw][lr]x instructions

vextublx: Vector Extract Unsigned Byte Left
vextuhlx: Vector Extract Unsigned Halfword Left
vextuwlx: Vector Extract Unsigned Word Left
vextubrx: Vector Extract Unsigned Byte Right-Indexed VX-form
vextuhrx: Vector Extract Unsigned  Halfword Right-Indexed VX-form
vextuwrx: Vector Extract Unsigned Word Right-Indexed VX-form

Signed-off-by: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
Signed-off-by: Hariharan T.S. <hari@linux.vnet.ibm.com>
[ implement using int128_rshift ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-28 10:56:42 +03:00
+								GEN_VXFORM_HETRO(vextublx, 6, 24)
 								GEN_VXFORM_HETRO(vextuhlx, 6, 25)
 								GEN_VXFORM_HETRO(vextuwlx, 6, 26)
-												target/ppc: Refactor emulation of vmrgew and vmrgow instructions

Since I found this two instructions implemented with tcg, I refactored
them so they are consistent with other similar implementations that
I introduced in this patch.

Also, a new dual macro GEN_VXFORM_TRANS_DUAL is added. This macro is
used if one instruction is realized with direct translation, and second
one with a helper.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1566898663-25858-4-git-send-email-stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-08-27 12:37:43 +03:00
+								GEN_VXFORM_TRANS_DUAL(vmrgow, PPC_NONE, PPC2_ALTIVEC_207,
-												target-ppc: add vextu[bhw][lr]x instructions

vextublx: Vector Extract Unsigned Byte Left
vextuhlx: Vector Extract Unsigned Halfword Left
vextuwlx: Vector Extract Unsigned Word Left
vextubrx: Vector Extract Unsigned Byte Right-Indexed VX-form
vextuhrx: Vector Extract Unsigned  Halfword Right-Indexed VX-form
vextuwrx: Vector Extract Unsigned Word Right-Indexed VX-form

Signed-off-by: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
Signed-off-by: Hariharan T.S. <hari@linux.vnet.ibm.com>
[ implement using int128_rshift ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-28 10:56:42 +03:00
+								                vextuwlx, PPC_NONE, PPC2_ISA300)
 								GEN_VXFORM_HETRO(vextubrx, 6, 28)
 								GEN_VXFORM_HETRO(vextuhrx, 6, 29)
 								GEN_VXFORM_HETRO(vextuwrx, 6, 30)
-												target/ppc: Optimize emulation of lvsl and lvsr instructions

Adding simple macro that is calling tcg implementation of appropriate
instruction if altivec support is active.

Optimization of altivec instruction lvsl (Load Vector for Shift Left).
Place bytes sh:sh+15 of value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F
in destination register. Sh is calculated by adding 2 source registers and
getting bits 60-63 of result.

First, the bits [28-31] are placed from EA to variable sh. After that,
the bytes are created in the following way:
sh:(sh+7) of X(from description) by multiplying sh with 0x0101010101010101
followed by addition of the result with 0x0001020304050607. Value obtained
is placed in higher doubleword element of vD.
(sh+8):(sh+15) by adding the result of previous multiplication with
0x08090a0b0c0d0e0f. Value obtained is placed in lower doubleword element
of vD.

Optimization of altivec instruction lvsr (Load Vector for Shift Right).
Place bytes 16-sh:31-sh of value 0x00 || 0x01 || 0x02 || ... || 0x1E ||
0x1F in destination register. Sh is calculated by adding 2 source
registers and getting bits 60-63 of result.

First, the bits [28-31] are placed from EA to variable sh. After that,
the bytes are created in the following way:
sh:(sh+7) of X(from description) by multiplying sh with 0x0101010101010101
followed by substraction of the result from 0x1011121314151617. Value
obtained is placed in higher doubleword element of vD.
(sh+8):(sh+15) by substracting the result of previous multiplication from
0x18191a1b1c1d1e1f. Value obtained is placed in lower doubleword element
of vD.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-2-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:47 +03:00
+								GEN_VXFORM_TRANS(lvsl, 6, 31)
 								GEN_VXFORM_TRANS(lvsr, 6, 32)
-												target/ppc: Refactor emulation of vmrgew and vmrgow instructions

Since I found this two instructions implemented with tcg, I refactored
them so they are consistent with other similar implementations that
I introduced in this patch.

Also, a new dual macro GEN_VXFORM_TRANS_DUAL is added. This macro is
used if one instruction is realized with direct translation, and second
one with a helper.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Message-Id: <1566898663-25858-4-git-send-email-stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-08-27 12:37:43 +03:00
+								GEN_VXFORM_TRANS_DUAL(vmrgew, PPC_NONE, PPC2_ALTIVEC_207,
-												target-ppc: add vextu[bhw][lr]x instructions

vextublx: Vector Extract Unsigned Byte Left
vextuhlx: Vector Extract Unsigned Halfword Left
vextuwlx: Vector Extract Unsigned Word Left
vextubrx: Vector Extract Unsigned Byte Right-Indexed VX-form
vextuhrx: Vector Extract Unsigned  Halfword Right-Indexed VX-form
vextuwrx: Vector Extract Unsigned Word Right-Indexed VX-form

Signed-off-by: Avinesh Kumar <avinesku@linux.vnet.ibm.com>
Signed-off-by: Hariharan T.S. <hari@linux.vnet.ibm.com>
[ implement using int128_rshift ]
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-28 10:56:42 +03:00
+								                vextuwrx, PPC_NONE, PPC2_ISA300)
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
 								#define GEN_VXRFORM1(opname, name, str, opc2, opc3)                     \
 								static void glue(gen_, name)(DisasContext *ctx)                         \
 								    {                                                                   \
 								        TCGv_ptr ra, rb, rd;                                            \
 								        if (unlikely(!ctx->altivec_enabled)) {                          \
 								            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
 								            return;                                                     \
 								        }                                                               \
 								        ra = gen_avr_ptr(rA(ctx->opcode));                              \
 								        rb = gen_avr_ptr(rB(ctx->opcode));                              \
 								        rd = gen_avr_ptr(rD(ctx->opcode));                              \
 								        gen_helper_##opname(cpu_env, rd, ra, rb);                       \
 								        tcg_temp_free_ptr(ra);                                          \
 								        tcg_temp_free_ptr(rb);                                          \
 								        tcg_temp_free_ptr(rd);                                          \
 								    }
 								#define GEN_VXRFORM(name, opc2, opc3)                                \
 								    GEN_VXRFORM1(name, name, #name, opc2, opc3)                      \
 								    GEN_VXRFORM1(name##_dot, name##_, #name ".", opc2, (opc3 | (0x1 << 4)))
 								/*
 								 * Support for Altivec instructions that use bit 31 (Rc) as an opcode
 								 * bit but also use bit 21 as an actual Rc bit.  In general, thse pairs
 								 * come from different versions of the ISA, so we must also support a
 								 * pair of flags for each instruction.
 								 */
 								#define GEN_VXRFORM_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)     \
 								static void glue(gen_, name0##_##name1)(DisasContext *ctx)             \
 								{                                                                      \
 								    if ((Rc(ctx->opcode) == 0) &&                                      \
 								        ((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
 								        if (Rc21(ctx->opcode) == 0) {                                  \
 								            gen_##name0(ctx);                                          \
 								        } else {                                                       \
 								            gen_##name0##_(ctx);                                       \
 								        }                                                              \
 								    } else if ((Rc(ctx->opcode) == 1) &&                               \
 								        ((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
 								        if (Rc21(ctx->opcode) == 0) {                                  \
 								            gen_##name1(ctx);                                          \
 								        } else {                                                       \
 								            gen_##name1##_(ctx);                                       \
 								        }                                                              \
 								    } else {                                                           \
 								        gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL);            \
 								    }                                                                  \
 								}
-												target/ppc: Move Vector Compare Equal/Not Equal/Greater Than to decodetree

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-10-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static void do_vcmp_rc(int vrt)
 								{
 								    TCGv_i64 tmp, set, clr;
 								    tmp = tcg_temp_new_i64();
 								    set = tcg_temp_new_i64();
 								    clr = tcg_temp_new_i64();
 								    get_avr64(tmp, vrt, true);
 								    tcg_gen_mov_i64(set, tmp);
 								    get_avr64(tmp, vrt, false);
 								    tcg_gen_or_i64(clr, set, tmp);
 								    tcg_gen_and_i64(set, set, tmp);
 								    tcg_gen_setcondi_i64(TCG_COND_EQ, clr, clr, 0);
 								    tcg_gen_shli_i64(clr, clr, 1);
 								    tcg_gen_setcondi_i64(TCG_COND_EQ, set, set, -1);
 								    tcg_gen_shli_i64(set, set, 3);
 								    tcg_gen_or_i64(tmp, set, clr);
 								    tcg_gen_extrl_i64_i32(cpu_crf[6], tmp);
 								    tcg_temp_free_i64(tmp);
 								    tcg_temp_free_i64(set);
 								    tcg_temp_free_i64(clr);
 								}
 								static bool do_vcmp(DisasContext *ctx, arg_VC *a, TCGCond cond, int vece)
 								{
 								    REQUIRE_VECTOR(ctx);
 								    tcg_gen_gvec_cmp(cond, vece, avr_full_offset(a->vrt),
 								                     avr_full_offset(a->vra), avr_full_offset(a->vrb), 16, 16);
 								    if (a->rc) {
 								        do_vcmp_rc(a->vrt);
 								    }
 								    return true;
 								}
 								TRANS_FLAGS(ALTIVEC, VCMPEQUB, do_vcmp, TCG_COND_EQ, MO_8)
 								TRANS_FLAGS(ALTIVEC, VCMPEQUH, do_vcmp, TCG_COND_EQ, MO_16)
 								TRANS_FLAGS(ALTIVEC, VCMPEQUW, do_vcmp, TCG_COND_EQ, MO_32)
 								TRANS_FLAGS2(ALTIVEC_207, VCMPEQUD, do_vcmp, TCG_COND_EQ, MO_64)
 								TRANS_FLAGS(ALTIVEC, VCMPGTSB, do_vcmp, TCG_COND_GT, MO_8)
 								TRANS_FLAGS(ALTIVEC, VCMPGTSH, do_vcmp, TCG_COND_GT, MO_16)
 								TRANS_FLAGS(ALTIVEC, VCMPGTSW, do_vcmp, TCG_COND_GT, MO_32)
 								TRANS_FLAGS2(ALTIVEC_207, VCMPGTSD, do_vcmp, TCG_COND_GT, MO_64)
 								TRANS_FLAGS(ALTIVEC, VCMPGTUB, do_vcmp, TCG_COND_GTU, MO_8)
 								TRANS_FLAGS(ALTIVEC, VCMPGTUH, do_vcmp, TCG_COND_GTU, MO_16)
 								TRANS_FLAGS(ALTIVEC, VCMPGTUW, do_vcmp, TCG_COND_GTU, MO_32)
 								TRANS_FLAGS2(ALTIVEC_207, VCMPGTUD, do_vcmp, TCG_COND_GTU, MO_64)
 								TRANS_FLAGS2(ISA300, VCMPNEB, do_vcmp, TCG_COND_NE, MO_8)
 								TRANS_FLAGS2(ISA300, VCMPNEH, do_vcmp, TCG_COND_NE, MO_16)
 								TRANS_FLAGS2(ISA300, VCMPNEW, do_vcmp, TCG_COND_NE, MO_32)
-												target/ppc: Move Vector Compare Not Equal or Zero to decodetree

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-11-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static void gen_vcmpnez_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
 								{
 								    TCGv_vec t0, t1, zero;
 								    t0 = tcg_temp_new_vec_matching(t);
 								    t1 = tcg_temp_new_vec_matching(t);
 								    zero = tcg_constant_vec_matching(t, vece, 0);
 								    tcg_gen_cmp_vec(TCG_COND_EQ, vece, t0, a, zero);
 								    tcg_gen_cmp_vec(TCG_COND_EQ, vece, t1, b, zero);
 								    tcg_gen_cmp_vec(TCG_COND_NE, vece, t, a, b);
 								    tcg_gen_or_vec(vece, t, t, t0);
 								    tcg_gen_or_vec(vece, t, t, t1);
 								    tcg_temp_free_vec(t0);
 								    tcg_temp_free_vec(t1);
 								}
 								static bool do_vcmpnez(DisasContext *ctx, arg_VC *a, int vece)
 								{
 								    static const TCGOpcode vecop_list[] = {
 								        INDEX_op_cmp_vec, 0
 								    };
 								    static const GVecGen3 ops[3] = {
 								        {
 								            .fniv = gen_vcmpnez_vec,
 								            .fno = gen_helper_VCMPNEZB,
 								            .opt_opc = vecop_list,
 								            .vece = MO_8
 								        },
 								        {
 								            .fniv = gen_vcmpnez_vec,
 								            .fno = gen_helper_VCMPNEZH,
 								            .opt_opc = vecop_list,
 								            .vece = MO_16
 								        },
 								        {
 								            .fniv = gen_vcmpnez_vec,
 								            .fno = gen_helper_VCMPNEZW,
 								            .opt_opc = vecop_list,
 								            .vece = MO_32
 								        }
 								    };
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 								    REQUIRE_VECTOR(ctx);
 								    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
 								                   avr_full_offset(a->vrb), 16, 16, &ops[vece]);
 								    if (a->rc) {
 								        do_vcmp_rc(a->vrt);
 								    }
 								    return true;
 								}
 								TRANS(VCMPNEZB, do_vcmpnez, MO_8)
 								TRANS(VCMPNEZH, do_vcmpnez, MO_16)
 								TRANS(VCMPNEZW, do_vcmpnez, MO_32)
-												target/ppc: Implement Vector Compare Equal Quadword

Implement the following PowerISA v3.1 instructions:
vcmpequq: Vector Compare Equal Quadword

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-12-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static bool trans_VCMPEQUQ(DisasContext *ctx, arg_VC *a)
 								{
 								    TCGv_i64 t0, t1, t2;
 								    t0 = tcg_temp_new_i64();
 								    t1 = tcg_temp_new_i64();
 								    t2 = tcg_temp_new_i64();
 								    get_avr64(t0, a->vra, true);
 								    get_avr64(t1, a->vrb, true);
 								    tcg_gen_xor_i64(t2, t0, t1);
 								    get_avr64(t0, a->vra, false);
 								    get_avr64(t1, a->vrb, false);
 								    tcg_gen_xor_i64(t1, t0, t1);
 								    tcg_gen_or_i64(t1, t1, t2);
 								    tcg_gen_setcondi_i64(TCG_COND_EQ, t1, t1, 0);
 								    tcg_gen_neg_i64(t1, t1);
 								    set_avr64(a->vrt, t1, true);
 								    set_avr64(a->vrt, t1, false);
 								    if (a->rc) {
 								        tcg_gen_extrl_i64_i32(cpu_crf[6], t1);
 								        tcg_gen_andi_i32(cpu_crf[6], cpu_crf[6], 0xa);
 								        tcg_gen_xori_i32(cpu_crf[6], cpu_crf[6], 0x2);
 								    }
 								    tcg_temp_free_i64(t0);
 								    tcg_temp_free_i64(t1);
 								    tcg_temp_free_i64(t2);
 								    return true;
 								}
-												target/ppc: Implement Vector Compare Greater Than Quadword

Implement the following PowerISA v3.1 instructions:
vcmpgtsq: Vector Compare Greater Than Signed Quadword
vcmpgtuq: Vector Compare Greater Than Unsigned Quadword

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-13-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static bool do_vcmpgtq(DisasContext *ctx, arg_VC *a, bool sign)
 								{
 								    TCGv_i64 t0, t1, t2;
 								    t0 = tcg_temp_new_i64();
 								    t1 = tcg_temp_new_i64();
 								    t2 = tcg_temp_new_i64();
 								    get_avr64(t0, a->vra, false);
 								    get_avr64(t1, a->vrb, false);
 								    tcg_gen_setcond_i64(TCG_COND_GTU, t2, t0, t1);
 								    get_avr64(t0, a->vra, true);
 								    get_avr64(t1, a->vrb, true);
 								    tcg_gen_movcond_i64(TCG_COND_EQ, t2, t0, t1, t2, tcg_constant_i64(0));
 								    tcg_gen_setcond_i64(sign ? TCG_COND_GT : TCG_COND_GTU, t1, t0, t1);
 								    tcg_gen_or_i64(t1, t1, t2);
 								    tcg_gen_neg_i64(t1, t1);
 								    set_avr64(a->vrt, t1, true);
 								    set_avr64(a->vrt, t1, false);
 								    if (a->rc) {
 								        tcg_gen_extrl_i64_i32(cpu_crf[6], t1);
 								        tcg_gen_andi_i32(cpu_crf[6], cpu_crf[6], 0xa);
 								        tcg_gen_xori_i32(cpu_crf[6], cpu_crf[6], 0x2);
 								    }
 								    tcg_temp_free_i64(t0);
 								    tcg_temp_free_i64(t1);
 								    tcg_temp_free_i64(t2);
 								    return true;
 								}
 								TRANS(VCMPGTSQ, do_vcmpgtq, true)
 								TRANS(VCMPGTUQ, do_vcmpgtq, false)
-												target/ppc: Implement Vector Compare Quadword

Implement the following PowerISA v3.1 instructions:
vcmpsq: Vector Compare Signed Quadword
vcmpuq: Vector Compare Unsigned Quadword

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-14-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static bool do_vcmpq(DisasContext *ctx, arg_VX_bf *a, bool sign)
 								{
 								    TCGv_i64 vra, vrb;
 								    TCGLabel *gt, *lt, *done;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    vra = tcg_temp_local_new_i64();
 								    vrb = tcg_temp_local_new_i64();
 								    gt = gen_new_label();
 								    lt = gen_new_label();
 								    done = gen_new_label();
 								    get_avr64(vra, a->vra, true);
 								    get_avr64(vrb, a->vrb, true);
 								    tcg_gen_brcond_i64((sign ? TCG_COND_GT : TCG_COND_GTU), vra, vrb, gt);
 								    tcg_gen_brcond_i64((sign ? TCG_COND_LT : TCG_COND_LTU), vra, vrb, lt);
 								    get_avr64(vra, a->vra, false);
 								    get_avr64(vrb, a->vrb, false);
 								    tcg_gen_brcond_i64(TCG_COND_GTU, vra, vrb, gt);
 								    tcg_gen_brcond_i64(TCG_COND_LTU, vra, vrb, lt);
 								    tcg_gen_movi_i32(cpu_crf[a->bf], CRF_EQ);
 								    tcg_gen_br(done);
 								    gen_set_label(gt);
 								    tcg_gen_movi_i32(cpu_crf[a->bf], CRF_GT);
 								    tcg_gen_br(done);
 								    gen_set_label(lt);
 								    tcg_gen_movi_i32(cpu_crf[a->bf], CRF_LT);
 								    tcg_gen_br(done);
 								    gen_set_label(done);
 								    tcg_temp_free_i64(vra);
 								    tcg_temp_free_i64(vrb);
 								    return true;
 								}
 								TRANS(VCMPSQ, do_vcmpq, true)
 								TRANS(VCMPUQ, do_vcmpq, false)
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VXRFORM(vcmpeqfp, 3, 3)
 								GEN_VXRFORM(vcmpgefp, 3, 7)
 								GEN_VXRFORM(vcmpgtfp, 3, 11)
 								GEN_VXRFORM(vcmpbfp, 3, 15)
-												target/ppc: Use tcg_gen_gvec_dup_imm

We can now unify the implementation of the 3 VSPLTI instructions.

Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

											
										
										
											2020-03-29 00:58:36 +03:00
+								static void gen_vsplti(DisasContext *ctx, int vece)
 								{
 								    int simm;
 								    if (unlikely(!ctx->altivec_enabled)) {
 								        gen_exception(ctx, POWERPC_EXCP_VPU);
 								        return;
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    }
-												target/ppc: Use tcg_gen_gvec_dup_imm

We can now unify the implementation of the 3 VSPLTI instructions.

Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

											
										
										
											2020-03-29 00:58:36 +03:00
+								    simm = SIMM5(ctx->opcode);
 								    tcg_gen_gvec_dup_imm(vece, avr_full_offset(rD(ctx->opcode)), 16, 16, simm);
 								}
 								#define GEN_VXFORM_VSPLTI(name, vece, opc2, opc3) \
 								static void glue(gen_, name)(DisasContext *ctx) { gen_vsplti(ctx, vece); }
 								GEN_VXFORM_VSPLTI(vspltisb, MO_8, 6, 12);
 								GEN_VXFORM_VSPLTI(vspltish, MO_16, 6, 13);
 								GEN_VXFORM_VSPLTI(vspltisw, MO_32, 6, 14);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
 								#define GEN_VXFORM_NOA(name, opc2, opc3)                                \
-												target/ppc: Style fixes for translate/vmx-impl.inc.c

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Greg Kurz <groug@kaod.org>

											
										
										
											2019-03-21 15:47:02 +03:00
+								static void glue(gen_, name)(DisasContext *ctx)                         \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    {                                                                   \
 								        TCGv_ptr rb, rd;                                                \
 								        if (unlikely(!ctx->altivec_enabled)) {                          \
 								            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
 								            return;                                                     \
 								        }                                                               \
 								        rb = gen_avr_ptr(rB(ctx->opcode));                              \
 								        rd = gen_avr_ptr(rD(ctx->opcode));                              \
-												target/ppc: Style fixes for translate/vmx-impl.inc.c

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Greg Kurz <groug@kaod.org>

											
										
										
											2019-03-21 15:47:02 +03:00
+								        gen_helper_##name(rd, rb);                                      \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								        tcg_temp_free_ptr(rb);                                          \
-												target/ppc: Style fixes for translate/vmx-impl.inc.c

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Greg Kurz <groug@kaod.org>

											
										
										
											2019-03-21 15:47:02 +03:00
+								        tcg_temp_free_ptr(rd);                                          \
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    }
 								#define GEN_VXFORM_NOA_ENV(name, opc2, opc3)                            \
 								static void glue(gen_, name)(DisasContext *ctx)                         \
 								    {                                                                   \
 								        TCGv_ptr rb, rd;                                                \
 								                                                                        \
 								        if (unlikely(!ctx->altivec_enabled)) {                          \
 								            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
 								            return;                                                     \
 								        }                                                               \
 								        rb = gen_avr_ptr(rB(ctx->opcode));                              \
 								        rd = gen_avr_ptr(rD(ctx->opcode));                              \
 								        gen_helper_##name(cpu_env, rd, rb);                             \
 								        tcg_temp_free_ptr(rb);                                          \
 								        tcg_temp_free_ptr(rd);                                          \
 								    }
-												target-ppc: add vector count trailing zeros instructions

The following vector count trailing zeros instructions are
added from ISA 3.0.

vctzb - Vector Count Trailing Zeros Byte
vctzh - Vector Count Trailing Zeros Halfword
vctzw - Vector Count Trailing Zeros Word
vctzd - Vector Count Trailing Zeros Doubleword

Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-09-06 08:04:08 +03:00
+								#define GEN_VXFORM_NOA_2(name, opc2, opc3, opc4)                        \
 								static void glue(gen_, name)(DisasContext *ctx)                         \
 								    {                                                                   \
 								        TCGv_ptr rb, rd;                                                \
 								        if (unlikely(!ctx->altivec_enabled)) {                          \
 								            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
 								            return;                                                     \
 								        }                                                               \
 								        rb = gen_avr_ptr(rB(ctx->opcode));                              \
 								        rd = gen_avr_ptr(rD(ctx->opcode));                              \
 								        gen_helper_##name(rd, rb);                                      \
 								        tcg_temp_free_ptr(rb);                                          \
 								        tcg_temp_free_ptr(rd);                                          \
 								    }
-												target-ppc: add vclzlsbb/vctzlsbb instructions

The following vector instructions are added from ISA 3.0.

vclzlsbb - Vector Count Leading Zero Least-Significant Bits Byte
vctzlsbb - Vector Count Trailing Zero Least-Significant Bits Byte

Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-09-28 08:45:18 +03:00
+								#define GEN_VXFORM_NOA_3(name, opc2, opc3, opc4)                        \
 								static void glue(gen_, name)(DisasContext *ctx)                         \
 								    {                                                                   \
 								        TCGv_ptr rb;                                                    \
 								        if (unlikely(!ctx->altivec_enabled)) {                          \
 								            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
 								            return;                                                     \
 								        }                                                               \
 								        rb = gen_avr_ptr(rB(ctx->opcode));                              \
 								        gen_helper_##name(cpu_gpr[rD(ctx->opcode)], rb);                \
 								        tcg_temp_free_ptr(rb);                                          \
 								    }
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VXFORM_NOA(vupkhsb, 7, 8);
 								GEN_VXFORM_NOA(vupkhsh, 7, 9);
 								GEN_VXFORM_NOA(vupkhsw, 7, 25);
 								GEN_VXFORM_NOA(vupklsb, 7, 10);
 								GEN_VXFORM_NOA(vupklsh, 7, 11);
 								GEN_VXFORM_NOA(vupklsw, 7, 27);
 								GEN_VXFORM_NOA(vupkhpx, 7, 13);
 								GEN_VXFORM_NOA(vupklpx, 7, 15);
 								GEN_VXFORM_NOA_ENV(vrefp, 5, 4);
 								GEN_VXFORM_NOA_ENV(vrsqrtefp, 5, 5);
 								GEN_VXFORM_NOA_ENV(vexptefp, 5, 6);
 								GEN_VXFORM_NOA_ENV(vlogefp, 5, 7);
 								GEN_VXFORM_NOA_ENV(vrfim, 5, 11);
 								GEN_VXFORM_NOA_ENV(vrfin, 5, 8);
 								GEN_VXFORM_NOA_ENV(vrfip, 5, 10);
 								GEN_VXFORM_NOA_ENV(vrfiz, 5, 9);
-												target-ppc: add vprtyb[w/d/q] instructions

Add following POWER ISA 3.0 instructions.
vprtybw: Vector Parity Byte Word
vprtybd: Vector Parity Byte Double Word
vprtybq: Vector Parity Byte Quad Word

Signed-off-by: Ankit Kumar <ankit@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-30 06:14:58 +03:00
+								GEN_VXFORM_NOA(vprtybw, 1, 24);
 								GEN_VXFORM_NOA(vprtybd, 1, 24);
 								GEN_VXFORM_NOA(vprtybq, 1, 24);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
-												target/ppc: convert vsplt[bhw] to use vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-5-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:45 +03:00
+								static void gen_vsplt(DisasContext *ctx, int vece)
 								{
 								    int uimm, dofs, bofs;
 								    if (unlikely(!ctx->altivec_enabled)) {
 								        gen_exception(ctx, POWERPC_EXCP_VPU);
 								        return;
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    }
-												target/ppc: convert vsplt[bhw] to use vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-5-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:45 +03:00
+								    uimm = UIMM5(ctx->opcode);
-												target/ppc: introduce avr_full_offset() function

All TCG vector operations require pointers to the base address of the vector
rather than separate access to the top and bottom 64-bits. Convert the VMX TCG
instructions to use a new avr_full_offset() function instead of avr64_offset()
which can then itself be written as a simple wrapper onto vsr_full_offset().

This same function can also reused in cpu_avr_ptr() to avoid having more than
one copy of the offset calculation logic.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Message-Id: <20190307180520.13868-5-mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-03-07 21:05:17 +03:00
+								    bofs = avr_full_offset(rB(ctx->opcode));
 								    dofs = avr_full_offset(rD(ctx->opcode));
-												target/ppc: convert vsplt[bhw] to use vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-5-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:45 +03:00
 								    /* Experimental testing shows that hardware masks the immediate.  */
 								    bofs += (uimm << vece) & 15;
-												Replace config-time define HOST_WORDS_BIGENDIAN

Replace a config-time define with a compile time condition
define (compatible with clang and gcc) that must be declared prior to
its usage. This avoids having a global configure time define, but also
prevents from bad usage, if the config header wasn't included before.

This can help to make some code independent from qemu too.

gcc supports __BYTE_ORDER__ from about 4.6 and clang from 3.2.

Signed-off-by: Marc-André Lureau <marcandre.lureau@redhat.com>
[ For the s390x parts I'm involved in ]
Acked-by: Halil Pasic <pasic@linux.ibm.com>
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220323155743.1585078-7-marcandre.lureau@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

											
										
										
											2022-03-23 18:57:17 +03:00
+								#if !HOST_BIG_ENDIAN
-												target/ppc: convert vsplt[bhw] to use vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-5-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:45 +03:00
+								    bofs ^= 15;
 								    bofs &= ~((1 << vece) - 1);
 								#endif
 								    tcg_gen_gvec_dup_mem(vece, dofs, bofs, 16, 16);
 								}
 								#define GEN_VXFORM_VSPLT(name, vece, opc2, opc3) \
 								static void glue(gen_, name)(DisasContext *ctx) { gen_vsplt(ctx, vece); }
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								#define GEN_VXFORM_UIMM_ENV(name, opc2, opc3)                           \
 								static void glue(gen_, name)(DisasContext *ctx)                         \
 								    {                                                                   \
 								        TCGv_ptr rb, rd;                                                \
 								        TCGv_i32 uimm;                                                  \
 								                                                                        \
 								        if (unlikely(!ctx->altivec_enabled)) {                          \
 								            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
 								            return;                                                     \
 								        }                                                               \
 								        uimm = tcg_const_i32(UIMM5(ctx->opcode));                       \
 								        rb = gen_avr_ptr(rB(ctx->opcode));                              \
 								        rd = gen_avr_ptr(rD(ctx->opcode));                              \
 								        gen_helper_##name(cpu_env, rd, rb, uimm);                       \
 								        tcg_temp_free_i32(uimm);                                        \
 								        tcg_temp_free_ptr(rb);                                          \
 								        tcg_temp_free_ptr(rd);                                          \
 								    }
-												target-ppc: add vector insert instructions

The following vector insert instructions are added from ISA 3.0.

vinsertb - Vector Insert Byte
vinserth - Vector Insert Halfword
vinsertw - Vector Insert Word
vinsertd - Vector Insert Doubleword

Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-09-06 08:04:06 +03:00
+								#define GEN_VXFORM_UIMM_SPLAT(name, opc2, opc3, splat_max)              \
 								static void glue(gen_, name)(DisasContext *ctx)                         \
 								    {                                                                   \
 								        TCGv_ptr rb, rd;                                                \
 								        uint8_t uimm = UIMM4(ctx->opcode);                              \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        TCGv_i32 t0;                                                    \
-												target-ppc: add vector insert instructions

The following vector insert instructions are added from ISA 3.0.

vinsertb - Vector Insert Byte
vinserth - Vector Insert Halfword
vinsertw - Vector Insert Word
vinsertd - Vector Insert Doubleword

Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-09-06 08:04:06 +03:00
+								        if (unlikely(!ctx->altivec_enabled)) {                          \
 								            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
 								            return;                                                     \
 								        }                                                               \
 								        if (uimm > splat_max) {                                         \
 								            uimm = 0;                                                   \
 								        }                                                               \
-												target/ppc: introduce get_avr64() and set_avr64() helpers for VMX register access

These helpers allow us to move AVR register values to/from the specified TCGv_i64
argument.

To prevent VMX helpers accessing the cpu_avr{l,h} arrays directly, add extra TCG
temporaries as required.

Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-01-02 12:14:18 +03:00
+								        t0 = tcg_temp_new_i32();                                        \
-												target-ppc: add vector insert instructions

The following vector insert instructions are added from ISA 3.0.

vinsertb - Vector Insert Byte
vinserth - Vector Insert Halfword
vinsertw - Vector Insert Word
vinsertd - Vector Insert Doubleword

Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-09-06 08:04:06 +03:00
+								        tcg_gen_movi_i32(t0, uimm);                                     \
 								        rb = gen_avr_ptr(rB(ctx->opcode));                              \
 								        rd = gen_avr_ptr(rD(ctx->opcode));                              \
 								        gen_helper_##name(rd, rb, t0);                                  \
 								        tcg_temp_free_i32(t0);                                          \
 								        tcg_temp_free_ptr(rb);                                          \
 								        tcg_temp_free_ptr(rd);                                          \
 								    }
-												target/ppc: convert vsplt[bhw] to use vector operations

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Message-Id: <20190215100058.20015-5-mark.cave-ayland@ilande.co.uk>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-02-15 13:00:45 +03:00
+								GEN_VXFORM_VSPLT(vspltb, MO_8, 6, 8);
 								GEN_VXFORM_VSPLT(vsplth, MO_16, 6, 9);
 								GEN_VXFORM_VSPLT(vspltw, MO_32, 6, 10);
-												target-ppc: add vector extract instructions

The following vector extract instructions are added from ISA 3.0.

vextractub - Vector Extract Unsigned Byte
vextractuh - Vector Extract Unsigned Halfword
vextractuw - Vector Extract Unsigned Word
vextractd - Vector Extract Unsigned Doubleword

Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-09-06 08:04:07 +03:00
+								GEN_VXFORM_UIMM_SPLAT(vextractub, 6, 8, 15);
 								GEN_VXFORM_UIMM_SPLAT(vextractuh, 6, 9, 14);
 								GEN_VXFORM_UIMM_SPLAT(vextractuw, 6, 10, 12);
 								GEN_VXFORM_UIMM_SPLAT(vextractd, 6, 11, 8);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VXFORM_UIMM_ENV(vcfux, 5, 12);
 								GEN_VXFORM_UIMM_ENV(vcfsx, 5, 13);
 								GEN_VXFORM_UIMM_ENV(vctuxs, 5, 14);
 								GEN_VXFORM_UIMM_ENV(vctsxs, 5, 15);
-												target-ppc: fix vmx instruction type/type2

A few of the new instructions added inadvertently changed the type of
old instruction(PPC_ALTIVEC) to PPC2_ALTIVEC_207 in the dual form
declaration.

commit: b5d569a1 (target-ppc: add vector extract instructions)
commit: e7b1e06f (target-ppc: add vector insert instructions)
commit: 3aa56a19 (target-ppc: add vector compare not equal instructions)

New ISA 3.0 instructions added:
    vextractub     PPC_NONE     PPC2_ISA300
    vextractuh     PPC_NONE     PPC2_ISA300
    vextractuw     PPC_NONE     PPC2_ISA300
    vinsertb       PPC_NONE     PPC2_ISA300
    vinserth       PPC_NONE     PPC2_ISA300
    vinsertw       PPC_NONE     PPC2_ISA300
    vcmpneb        PPC_NONE     PPC2_ISA300
    vcmpneh        PPC_NONE     PPC2_ISA300
    vcmpnew        PPC_NONE     PPC2_ISA300

Affected older instructions:
    vspltb         PPC_ALTIVEC  PPC_NONE
    vsplth         PPC_ALTIVEC  PPC_NONE
    vspltw         PPC_ALTIVEC  PPC_NONE
    vspltisb       PPC_ALTIVEC  PPC_NONE
    vspltish       PPC_ALTIVEC  PPC_NONE
    vspltisw       PPC_ALTIVEC  PPC_NONE
    vcmpequb       PPC_ALTIVEC  PPC_NONE
    vcmpequh       PPC_ALTIVEC  PPC_NONE
    vcmpequw       PPC_ALTIVEC  PPC_NONE

Change the instruction type/type2 for the older instructions back to
what it was(PPC_ALTIVEC).

CC: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Reported-by: Bharata B Rao <bharata@linux.vnet.ibm.com>
Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-09-29 13:22:37 +03:00
+								GEN_VXFORM_DUAL(vspltb, PPC_ALTIVEC, PPC_NONE,
 								                vextractub, PPC_NONE, PPC2_ISA300);
 								GEN_VXFORM_DUAL(vsplth, PPC_ALTIVEC, PPC_NONE,
 								                vextractuh, PPC_NONE, PPC2_ISA300);
 								GEN_VXFORM_DUAL(vspltw, PPC_ALTIVEC, PPC_NONE,
 								                vextractuw, PPC_NONE, PPC2_ISA300);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
-												target/ppc: implement vgnb

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-19-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static bool trans_VGNB(DisasContext *ctx, arg_VX_n *a)
 								{
 								    /*
 								     * Similar to do_vextractm, we'll use a sequence of mask-shift-or operations
 								     * to gather the bits. The masks can be created with
 								     *
 								     * uint64_t mask(uint64_t n, uint64_t step)
 								     * {
 								     *     uint64_t p = ((1UL << (1UL << step)) - 1UL) << ((n - 1UL) << step),
 								     *                  plen = n << step, m = 0;
 								     *     for(int i = 0; i < 64/plen; i++) {
 								     *         m |= p;
 								     *         m = ror64(m, plen);
 								     *     }
 								     *     p >>= plen * DIV_ROUND_UP(64, plen) - 64;
 								     *     return m | p;
 								     * }
 								     *
 								     * But since there are few values of N, we'll use a lookup table to avoid
 								     * these calculations at runtime.
 								     */
 								    static const uint64_t mask[6][5] = {
 								        {
 xAAAAAAAAAAAAAAAAULL, 0xccccccccccccccccULL, 0xf0f0f0f0f0f0f0f0ULL,
 xff00ff00ff00ff00ULL, 0xffff0000ffff0000ULL
 								        },
 								        {
 x9249249249249249ULL, 0xC30C30C30C30C30CULL, 0xF00F00F00F00F00FULL,
 xFF0000FF0000FF00ULL, 0xFFFF00000000FFFFULL
 								        },
 								        {
 								            /* For N >= 4, some mask operations can be elided */
 x8888888888888888ULL, 0, 0xf000f000f000f000ULL, 0,
 xFFFF000000000000ULL
 								        },
 								        {
 x8421084210842108ULL, 0, 0xF0000F0000F0000FULL, 0, 0
 								        },
 								        {
 x8208208208208208ULL, 0, 0xF00000F00000F000ULL, 0, 0
 								        },
 								        {
 x8102040810204081ULL, 0, 0xF000000F000000F0ULL, 0, 0
 								        }
 								    };
 								    uint64_t m;
 								    int i, sh, nbits = DIV_ROUND_UP(64, a->n);
 								    TCGv_i64 hi, lo, t0, t1;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    if (a->n < 2) {
 								        /*
 								         * "N can be any value between 2 and 7, inclusive." Otherwise, the
 								         * result is undefined, so we don't need to change RT. Also, N > 7 is
 								         * impossible since the immediate field is 3 bits only.
 								         */
 								        return true;
 								    }
 								    hi = tcg_temp_new_i64();
 								    lo = tcg_temp_new_i64();
 								    t0 = tcg_temp_new_i64();
 								    t1 = tcg_temp_new_i64();
 								    get_avr64(hi, a->vrb, true);
 								    get_avr64(lo, a->vrb, false);
 								    /* Align the lower doubleword so we can use the same mask */
 								    tcg_gen_shli_i64(lo, lo, a->n * nbits - 64);
 								    /*
 								     * Starting from the most significant bit, gather every Nth bit with a
 								     * sequence of mask-shift-or operation. E.g.: for N=3
 								     * AxxBxxCxxDxxExxFxxGxxHxxIxxJxxKxxLxxMxxNxxOxxPxxQxxRxxSxxTxxUxxV
 								     *     & rep(0b100)
 								     * A..B..C..D..E..F..G..H..I..J..K..L..M..N..O..P..Q..R..S..T..U..V
 								     *     << 2
 								     * .B..C..D..E..F..G..H..I..J..K..L..M..N..O..P..Q..R..S..T..U..V..
 								     *     |
 								     * AB.BC.CD.DE.EF.FG.GH.HI.IJ.JK.KL.LM.MN.NO.OP.PQ.QR.RS.ST.TU.UV.V
 								     *  & rep(0b110000)
 								     * AB....CD....EF....GH....IJ....KL....MN....OP....QR....ST....UV..
 								     *     << 4
 								     * ..CD....EF....GH....IJ....KL....MN....OP....QR....ST....UV......
 								     *     |
 								     * ABCD..CDEF..EFGH..GHIJ..IJKL..KLMN..MNOP..OPQR..QRST..STUV..UV..
 								     *     & rep(0b111100000000)
 								     * ABCD........EFGH........IJKL........MNOP........QRST........UV..
 								     *     << 8
 								     * ....EFGH........IJKL........MNOP........QRST........UV..........
 								     *     |
 								     * ABCDEFGH....EFGHIJKL....IJKLMNOP....MNOPQRST....QRSTUV......UV..
 								     *  & rep(0b111111110000000000000000)
 								     * ABCDEFGH................IJKLMNOP................QRSTUV..........
 								     *     << 16
 								     * ........IJKLMNOP................QRSTUV..........................
 								     *     |
 								     * ABCDEFGHIJKLMNOP........IJKLMNOPQRSTUV..........QRSTUV..........
 								     *     & rep(0b111111111111111100000000000000000000000000000000)
 								     * ABCDEFGHIJKLMNOP................................QRSTUV..........
 								     *     << 32
 								     * ................QRSTUV..........................................
 								     *     |
 								     * ABCDEFGHIJKLMNOPQRSTUV..........................QRSTUV..........
 								     */
 								    for (i = 0, sh = a->n - 1; i < 5; i++, sh <<= 1) {
 								        m = mask[a->n - 2][i];
 								        if (m) {
 								            tcg_gen_andi_i64(hi, hi, m);
 								            tcg_gen_andi_i64(lo, lo, m);
 								        }
 								        if (sh < 64) {
 								            tcg_gen_shli_i64(t0, hi, sh);
 								            tcg_gen_shli_i64(t1, lo, sh);
 								            tcg_gen_or_i64(hi, t0, hi);
 								            tcg_gen_or_i64(lo, t1, lo);
 								        }
 								    }
 								    tcg_gen_andi_i64(hi, hi, ~(~0ULL >> nbits));
 								    tcg_gen_andi_i64(lo, lo, ~(~0ULL >> nbits));
 								    tcg_gen_shri_i64(lo, lo, nbits);
 								    tcg_gen_or_i64(hi, hi, lo);
 								    tcg_gen_trunc_i64_tl(cpu_gpr[a->rt], hi);
 								    tcg_temp_free_i64(hi);
 								    tcg_temp_free_i64(lo);
 								    tcg_temp_free_i64(t0);
 								    tcg_temp_free_i64(t1);
 								    return true;
 								}
-												target/ppc: Implement Vector Extract Double to VSR using GPR index insns

Implement the following PowerISA v3.1 instructions:
vextdubvlx: Vector Extract Double Unsigned Byte to VSR using
            GPR-specified Left-Index
vextduhvlx: Vector Extract Double Unsigned Halfword to VSR using
            GPR-specified Left-Index
vextduwvlx: Vector Extract Double Unsigned Word to VSR using
            GPR-specified Left-Index
vextddvlx: Vector Extract Double Doubleword to VSR using
           GPR-specified Left-Index
vextdubvrx: Vector Extract Double Unsigned Byte to VSR using
            GPR-specified Right-Index
vextduhvrx: Vector Extract Double Unsigned Halfword to VSR using
            GPR-specified Right-Index
vextduwvrx: Vector Extract Double Unsigned Word to VSR using
            GPR-specified Right-Index
vextddvrx: Vector Extract Double Doubleword to VSR using
           GPR-specified Right-Index

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-10-matheus.ferst@eldorado.org.br>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:37:03 +03:00
+								static bool do_vextdx(DisasContext *ctx, arg_VA *a, int size, bool right,
 								               void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv))
 								{
 								    TCGv_ptr vrt, vra, vrb;
 								    TCGv rc;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    vrt = gen_avr_ptr(a->vrt);
 								    vra = gen_avr_ptr(a->vra);
 								    vrb = gen_avr_ptr(a->vrb);
 								    rc = tcg_temp_new();
 								    tcg_gen_andi_tl(rc, cpu_gpr[a->rc], 0x1F);
 								    if (right) {
 								        tcg_gen_subfi_tl(rc, 32 - size, rc);
 								    }
 								    gen_helper(cpu_env, vrt, vra, vrb, rc);
 								    tcg_temp_free_ptr(vrt);
 								    tcg_temp_free_ptr(vra);
 								    tcg_temp_free_ptr(vrb);
 								    tcg_temp_free(rc);
 								    return true;
 								}
 								TRANS(VEXTDUBVLX, do_vextdx, 1, false, gen_helper_VEXTDUBVLX)
 								TRANS(VEXTDUHVLX, do_vextdx, 2, false, gen_helper_VEXTDUHVLX)
 								TRANS(VEXTDUWVLX, do_vextdx, 4, false, gen_helper_VEXTDUWVLX)
 								TRANS(VEXTDDVLX, do_vextdx, 8, false, gen_helper_VEXTDDVLX)
 								TRANS(VEXTDUBVRX, do_vextdx, 1, true, gen_helper_VEXTDUBVLX)
 								TRANS(VEXTDUHVRX, do_vextdx, 2, true, gen_helper_VEXTDUHVLX)
 								TRANS(VEXTDUWVRX, do_vextdx, 4, true, gen_helper_VEXTDUWVLX)
 								TRANS(VEXTDDVRX, do_vextdx, 8, true, gen_helper_VEXTDDVLX)
-												target/ppc: Implement Vector Insert from GPR using GPR index insns

Implements the following PowerISA v3.1 instructions:
vinsblx: Vector Insert Byte from GPR using GPR-specified Left-Index
vinshlx: Vector Insert Halfword from GPR using GPR-specified Left-Index
vinswlx: Vector Insert Word from GPR using GPR-specified Left-Index
vinsdlx: Vector Insert Doubleword from GPR using GPR-specified
         Left-Index
vinsbrx: Vector Insert Byte from GPR using GPR-specified Right-Index
vinshrx: Vector Insert Halfword from GPR using GPR-specified
         Right-Index
vinswrx: Vector Insert Word from GPR using GPR-specified Right-Index
vinsdrx: Vector Insert Doubleword from GPR using GPR-specified
         Right-Index

The helpers and do_vinsx receive i64 to allow code sharing with the
future implementation of Vector Insert from VSR using GPR Index.

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-6-matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:36:59 +03:00
+								static bool do_vinsx(DisasContext *ctx, int vrt, int size, bool right, TCGv ra,
 								            TCGv_i64 rb, void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
 								{
 								    TCGv_ptr t;
 								    TCGv idx;
 								    t = gen_avr_ptr(vrt);
 								    idx = tcg_temp_new();
 								    tcg_gen_andi_tl(idx, ra, 0xF);
 								    if (right) {
 								        tcg_gen_subfi_tl(idx, 16 - size, idx);
 								    }
 								    gen_helper(cpu_env, t, rb, idx);
 								    tcg_temp_free_ptr(t);
 								    tcg_temp_free(idx);
 								    return true;
 								}
-												target/ppc: Implement Vector Insert from VSR using GPR index insns

Implements the following PowerISA v3.1 instructions:
vinsbvlx: Vector Insert Byte from VSR using GPR-specified Left-Index
vinshvlx: Vector Insert Halfword from VSR using GPR-specified
          Left-Index
vinswvlx: Vector Insert Word from VSR using GPR-specified Left-Index
vinsbvrx: Vector Insert Byte from VSR using GPR-specified Right-Index
vinshvrx: Vector Insert Halfword from VSR using GPR-specified
          Right-Index
vinswvrx: Vector Insert Word from VSR using GPR-specified Right-Index

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-8-matheus.ferst@eldorado.org.br>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:37:01 +03:00
+								static bool do_vinsvx(DisasContext *ctx, int vrt, int size, bool right, TCGv ra,
 								                int vrb, void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
 								{
 								    bool ok;
 								    TCGv_i64 val;
 								    val = tcg_temp_new_i64();
 								    get_avr64(val, vrb, true);
 								    ok = do_vinsx(ctx, vrt, size, right, ra, val, gen_helper);
 								    tcg_temp_free_i64(val);
 								    return ok;
 								}
-												target/ppc: Implement Vector Insert from GPR using GPR index insns

Implements the following PowerISA v3.1 instructions:
vinsblx: Vector Insert Byte from GPR using GPR-specified Left-Index
vinshlx: Vector Insert Halfword from GPR using GPR-specified Left-Index
vinswlx: Vector Insert Word from GPR using GPR-specified Left-Index
vinsdlx: Vector Insert Doubleword from GPR using GPR-specified
         Left-Index
vinsbrx: Vector Insert Byte from GPR using GPR-specified Right-Index
vinshrx: Vector Insert Halfword from GPR using GPR-specified
         Right-Index
vinswrx: Vector Insert Word from GPR using GPR-specified Right-Index
vinsdrx: Vector Insert Doubleword from GPR using GPR-specified
         Right-Index

The helpers and do_vinsx receive i64 to allow code sharing with the
future implementation of Vector Insert from VSR using GPR Index.

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-6-matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:36:59 +03:00
+								static bool do_vinsx_VX(DisasContext *ctx, arg_VX *a, int size, bool right,
 								                        void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
 								{
 								    bool ok;
 								    TCGv_i64 val;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    val = tcg_temp_new_i64();
 								    tcg_gen_extu_tl_i64(val, cpu_gpr[a->vrb]);
 								    ok = do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], val, gen_helper);
 								    tcg_temp_free_i64(val);
 								    return ok;
 								}
-												target/ppc: Implement Vector Insert from VSR using GPR index insns

Implements the following PowerISA v3.1 instructions:
vinsbvlx: Vector Insert Byte from VSR using GPR-specified Left-Index
vinshvlx: Vector Insert Halfword from VSR using GPR-specified
          Left-Index
vinswvlx: Vector Insert Word from VSR using GPR-specified Left-Index
vinsbvrx: Vector Insert Byte from VSR using GPR-specified Right-Index
vinshvrx: Vector Insert Halfword from VSR using GPR-specified
          Right-Index
vinswvrx: Vector Insert Word from VSR using GPR-specified Right-Index

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-8-matheus.ferst@eldorado.org.br>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:37:01 +03:00
+								static bool do_vinsvx_VX(DisasContext *ctx, arg_VX *a, int size, bool right,
 								                        void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
 								{
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    return do_vinsvx(ctx, a->vrt, size, right, cpu_gpr[a->vra], a->vrb,
 								                     gen_helper);
 								}
-												target/ppc: Implement Vector Insert Word from GPR using Immediate insns

Implements the following PowerISA v3.1 instructions:
vinsw: Vector Insert Word from GPR using immediate-specified index
vinsd: Vector Insert Doubleword from GPR using immediate-specified
       index

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-7-matheus.ferst@eldorado.org.br>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:37:00 +03:00
+								static bool do_vins_VX_uim4(DisasContext *ctx, arg_VX_uim4 *a, int size,
 								                        void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
 								{
 								    bool ok;
 								    TCGv_i64 val;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    if (a->uim > (16 - size)) {
 								        /*
 								         * PowerISA v3.1 says that the resulting value is undefined in this
 								         * case, so just log a guest error and leave VRT unchanged. The
 								         * real hardware would do a partial insert, e.g. if VRT is zeroed and
 								         * RB is 0x12345678, executing "vinsw VRT,RB,14" results in
 								         * VRT = 0x0000...00001234, but we don't bother to reproduce this
 								         * behavior as software shouldn't rely on it.
 								         */
 								        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for VINS* at"
 								            " 0x" TARGET_FMT_lx ", UIM = %d > %d\n", ctx->cia, a->uim,
 - size);
 								        return true;
 								    }
 								    val = tcg_temp_new_i64();
 								    tcg_gen_extu_tl_i64(val, cpu_gpr[a->vrb]);
 								    ok = do_vinsx(ctx, a->vrt, size, false, tcg_constant_tl(a->uim), val,
 								                  gen_helper);
 								    tcg_temp_free_i64(val);
 								    return ok;
 								}
-												target/ppc: Move vinsertb/vinserth/vinsertw/vinsertd to decodetree

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-9-matheus.ferst@eldorado.org.br>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:37:02 +03:00
+								static bool do_vinsert_VX_uim4(DisasContext *ctx, arg_VX_uim4 *a, int size,
 								                        void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
 								{
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 								    REQUIRE_VECTOR(ctx);
 								    if (a->uim > (16 - size)) {
 								        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for VINSERT* at"
 								            " 0x" TARGET_FMT_lx ", UIM = %d > %d\n", ctx->cia, a->uim,
 - size);
 								        return true;
 								    }
 								    return do_vinsvx(ctx, a->vrt, size, false, tcg_constant_tl(a->uim), a->vrb,
 								                     gen_helper);
 								}
-												target/ppc: Implement Vector Insert from GPR using GPR index insns

Implements the following PowerISA v3.1 instructions:
vinsblx: Vector Insert Byte from GPR using GPR-specified Left-Index
vinshlx: Vector Insert Halfword from GPR using GPR-specified Left-Index
vinswlx: Vector Insert Word from GPR using GPR-specified Left-Index
vinsdlx: Vector Insert Doubleword from GPR using GPR-specified
         Left-Index
vinsbrx: Vector Insert Byte from GPR using GPR-specified Right-Index
vinshrx: Vector Insert Halfword from GPR using GPR-specified
         Right-Index
vinswrx: Vector Insert Word from GPR using GPR-specified Right-Index
vinsdrx: Vector Insert Doubleword from GPR using GPR-specified
         Right-Index

The helpers and do_vinsx receive i64 to allow code sharing with the
future implementation of Vector Insert from VSR using GPR Index.

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-6-matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:36:59 +03:00
+								TRANS(VINSBLX, do_vinsx_VX, 1, false, gen_helper_VINSBLX)
 								TRANS(VINSHLX, do_vinsx_VX, 2, false, gen_helper_VINSHLX)
 								TRANS(VINSWLX, do_vinsx_VX, 4, false, gen_helper_VINSWLX)
 								TRANS(VINSDLX, do_vinsx_VX, 8, false, gen_helper_VINSDLX)
 								TRANS(VINSBRX, do_vinsx_VX, 1, true, gen_helper_VINSBLX)
 								TRANS(VINSHRX, do_vinsx_VX, 2, true, gen_helper_VINSHLX)
 								TRANS(VINSWRX, do_vinsx_VX, 4, true, gen_helper_VINSWLX)
 								TRANS(VINSDRX, do_vinsx_VX, 8, true, gen_helper_VINSDLX)
-												target/ppc: Implement Vector Insert Word from GPR using Immediate insns

Implements the following PowerISA v3.1 instructions:
vinsw: Vector Insert Word from GPR using immediate-specified index
vinsd: Vector Insert Doubleword from GPR using immediate-specified
       index

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-7-matheus.ferst@eldorado.org.br>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:37:00 +03:00
+								TRANS(VINSW, do_vins_VX_uim4, 4, gen_helper_VINSWLX)
 								TRANS(VINSD, do_vins_VX_uim4, 8, gen_helper_VINSDLX)
-												target/ppc: Implement Vector Insert from VSR using GPR index insns

Implements the following PowerISA v3.1 instructions:
vinsbvlx: Vector Insert Byte from VSR using GPR-specified Left-Index
vinshvlx: Vector Insert Halfword from VSR using GPR-specified
          Left-Index
vinswvlx: Vector Insert Word from VSR using GPR-specified Left-Index
vinsbvrx: Vector Insert Byte from VSR using GPR-specified Right-Index
vinshvrx: Vector Insert Halfword from VSR using GPR-specified
          Right-Index
vinswvrx: Vector Insert Word from VSR using GPR-specified Right-Index

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-8-matheus.ferst@eldorado.org.br>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:37:01 +03:00
+								TRANS(VINSBVLX, do_vinsvx_VX, 1, false, gen_helper_VINSBLX)
 								TRANS(VINSHVLX, do_vinsvx_VX, 2, false, gen_helper_VINSHLX)
 								TRANS(VINSWVLX, do_vinsvx_VX, 4, false, gen_helper_VINSWLX)
 								TRANS(VINSBVRX, do_vinsvx_VX, 1, true, gen_helper_VINSBLX)
 								TRANS(VINSHVRX, do_vinsvx_VX, 2, true, gen_helper_VINSHLX)
 								TRANS(VINSWVRX, do_vinsvx_VX, 4, true, gen_helper_VINSWLX)
-												target/ppc: Move vinsertb/vinserth/vinsertw/vinsertd to decodetree

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-9-matheus.ferst@eldorado.org.br>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:37:02 +03:00
+								TRANS(VINSERTB, do_vinsert_VX_uim4, 1, gen_helper_VINSBLX)
 								TRANS(VINSERTH, do_vinsert_VX_uim4, 2, gen_helper_VINSHLX)
 								TRANS(VINSERTW, do_vinsert_VX_uim4, 4, gen_helper_VINSWLX)
 								TRANS(VINSERTD, do_vinsert_VX_uim4, 8, gen_helper_VINSDLX)
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								static void gen_vsldoi(DisasContext *ctx)
 								{
 								    TCGv_ptr ra, rb, rd;
 								    TCGv_i32 sh;
 								    if (unlikely(!ctx->altivec_enabled)) {
 								        gen_exception(ctx, POWERPC_EXCP_VPU);
 								        return;
 								    }
 								    ra = gen_avr_ptr(rA(ctx->opcode));
 								    rb = gen_avr_ptr(rB(ctx->opcode));
 								    rd = gen_avr_ptr(rD(ctx->opcode));
 								    sh = tcg_const_i32(VSH(ctx->opcode));
-												target/ppc: Style fixes for translate/vmx-impl.inc.c

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Greg Kurz <groug@kaod.org>

											
										
										
											2019-03-21 15:47:02 +03:00
+								    gen_helper_vsldoi(rd, ra, rb, sh);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								    tcg_temp_free_ptr(ra);
 								    tcg_temp_free_ptr(rb);
 								    tcg_temp_free_ptr(rd);
 								    tcg_temp_free_i32(sh);
 								}
-												target/ppc: Implement vsldbi/vsrdbi instructions

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-5-matheus.ferst@eldorado.org.br>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:36:58 +03:00
+								static bool trans_VSLDBI(DisasContext *ctx, arg_VN *a)
 								{
 								    TCGv_i64 t0, t1, t2;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    t0 = tcg_temp_new_i64();
 								    t1 = tcg_temp_new_i64();
 								    get_avr64(t0, a->vra, true);
 								    get_avr64(t1, a->vra, false);
 								    if (a->sh != 0) {
 								        t2 = tcg_temp_new_i64();
 								        get_avr64(t2, a->vrb, true);
 								        tcg_gen_extract2_i64(t0, t1, t0, 64 - a->sh);
 								        tcg_gen_extract2_i64(t1, t2, t1, 64 - a->sh);
 								        tcg_temp_free_i64(t2);
 								    }
 								    set_avr64(a->vrt, t0, true);
 								    set_avr64(a->vrt, t1, false);
 								    tcg_temp_free_i64(t0);
 								    tcg_temp_free_i64(t1);
 								    return true;
 								}
 								static bool trans_VSRDBI(DisasContext *ctx, arg_VN *a)
 								{
 								    TCGv_i64 t2, t1, t0;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    t0 = tcg_temp_new_i64();
 								    t1 = tcg_temp_new_i64();
 								    get_avr64(t0, a->vrb, false);
 								    get_avr64(t1, a->vrb, true);
 								    if (a->sh != 0) {
 								        t2 = tcg_temp_new_i64();
 								        get_avr64(t2, a->vra, false);
 								        tcg_gen_extract2_i64(t0, t0, t1, a->sh);
 								        tcg_gen_extract2_i64(t1, t1, t2, a->sh);
 								        tcg_temp_free_i64(t2);
 								    }
 								    set_avr64(a->vrt, t0, false);
 								    set_avr64(a->vrt, t1, true);
 								    tcg_temp_free_i64(t0);
 								    tcg_temp_free_i64(t1);
 								    return true;
 								}
-												target/ppc: Implement Vector Expand Mask

Implement the following PowerISA v3.1 instructions:
vexpandbm: Vector Expand Byte Mask
vexpandhm: Vector Expand Halfword Mask
vexpandwm: Vector Expand Word Mask
vexpanddm: Vector Expand Doubleword Mask
vexpandqm: Vector Expand Quadword Mask

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211203194229.746275-2-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2021-12-17 19:57:13 +03:00
+								static bool do_vexpand(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
 								{
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    tcg_gen_gvec_sari(vece, avr_full_offset(a->vrt), avr_full_offset(a->vrb),
 								                      (8 << vece) - 1, 16, 16);
 								    return true;
 								}
 								TRANS(VEXPANDBM, do_vexpand, MO_8)
 								TRANS(VEXPANDHM, do_vexpand, MO_16)
 								TRANS(VEXPANDWM, do_vexpand, MO_32)
 								TRANS(VEXPANDDM, do_vexpand, MO_64)
 								static bool trans_VEXPANDQM(DisasContext *ctx, arg_VX_tb *a)
 								{
 								    TCGv_i64 tmp;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    tmp = tcg_temp_new_i64();
 								    get_avr64(tmp, a->vrb, true);
 								    tcg_gen_sari_i64(tmp, tmp, 63);
 								    set_avr64(a->vrt, tmp, false);
 								    set_avr64(a->vrt, tmp, true);
 								    tcg_temp_free_i64(tmp);
 								    return true;
 								}
-												target/ppc: Implement Vector Extract Mask

Implement the following PowerISA v3.1 instructions:
vextractbm: Vector Extract Byte Mask
vextracthm: Vector Extract Halfword Mask
vextractwm: Vector Extract Word Mask
vextractdm: Vector Extract Doubleword Mask
vextractqm: Vector Extract Quadword Mask

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20211203194229.746275-3-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2021-12-17 19:57:13 +03:00
+								static bool do_vextractm(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
 								{
 								    const uint64_t elem_width = 8 << vece, elem_count_half = 8 >> vece,
 								                   mask = dup_const(vece, 1 << (elem_width - 1));
 								    uint64_t i, j;
 								    TCGv_i64 lo, hi, t0, t1;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    hi = tcg_temp_new_i64();
 								    lo = tcg_temp_new_i64();
 								    t0 = tcg_temp_new_i64();
 								    t1 = tcg_temp_new_i64();
 								    get_avr64(lo, a->vrb, false);
 								    get_avr64(hi, a->vrb, true);
 								    tcg_gen_andi_i64(lo, lo, mask);
 								    tcg_gen_andi_i64(hi, hi, mask);
 								    /*
 								     * Gather the most significant bit of each element in the highest element
 								     * element. E.g. for bytes:
 								     * aXXXXXXXbXXXXXXXcXXXXXXXdXXXXXXXeXXXXXXXfXXXXXXXgXXXXXXXhXXXXXXX
 								     *     & dup(1 << (elem_width - 1))
 								     * a0000000b0000000c0000000d0000000e0000000f0000000g0000000h0000000
 								     *     << 32 - 4
 								     * 0000e0000000f0000000g0000000h00000000000000000000000000000000000
 								     *     |
 								     * a000e000b000f000c000g000d000h000e0000000f0000000g0000000h0000000
 								     *     << 16 - 2
 								     * 00c000g000d000h000e0000000f0000000g0000000h000000000000000000000
 								     *     |
 								     * a0c0e0g0b0d0f0h0c0e0g000d0f0h000e0g00000f0h00000g0000000h0000000
 								     *     << 8 - 1
 								     * 0b0d0f0h0c0e0g000d0f0h000e0g00000f0h00000g0000000h00000000000000
 								     *     |
 								     * abcdefghbcdefgh0cdefgh00defgh000efgh0000fgh00000gh000000h0000000
 								     */
 								    for (i = elem_count_half / 2, j = 32; i > 0; i >>= 1, j >>= 1) {
 								        tcg_gen_shli_i64(t0, hi, j - i);
 								        tcg_gen_shli_i64(t1, lo, j - i);
 								        tcg_gen_or_i64(hi, hi, t0);
 								        tcg_gen_or_i64(lo, lo, t1);
 								    }
 								    tcg_gen_shri_i64(hi, hi, 64 - elem_count_half);
 								    tcg_gen_extract2_i64(lo, lo, hi, 64 - elem_count_half);
 								    tcg_gen_trunc_i64_tl(cpu_gpr[a->vrt], lo);
 								    tcg_temp_free_i64(hi);
 								    tcg_temp_free_i64(lo);
 								    tcg_temp_free_i64(t0);
 								    tcg_temp_free_i64(t1);
 								    return true;
 								}
 								TRANS(VEXTRACTBM, do_vextractm, MO_8)
 								TRANS(VEXTRACTHM, do_vextractm, MO_16)
 								TRANS(VEXTRACTWM, do_vextractm, MO_32)
 								TRANS(VEXTRACTDM, do_vextractm, MO_64)
 								static bool trans_VEXTRACTQM(DisasContext *ctx, arg_VX_tb *a)
 								{
 								    TCGv_i64 tmp;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    tmp = tcg_temp_new_i64();
 								    get_avr64(tmp, a->vrb, true);
 								    tcg_gen_shri_i64(tmp, tmp, 63);
 								    tcg_gen_trunc_i64_tl(cpu_gpr[a->vrt], tmp);
 								    tcg_temp_free_i64(tmp);
 								    return true;
 								}
-												target/ppc: Implement Vector Mask Move insns

Implement the following PowerISA v3.1 instructions:
mtvsrbm: Move to VSR Byte Mask
mtvsrhm: Move to VSR Halfword Mask
mtvsrwm: Move to VSR Word Mask
mtvsrdm: Move to VSR Doubleword Mask
mtvsrqm: Move to VSR Quadword Mask
mtvsrbmi: Move to VSR Byte Mask Immediate

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211203194229.746275-4-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2021-12-17 19:57:13 +03:00
+								static bool do_mtvsrm(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
 								{
 								    const uint64_t elem_width = 8 << vece, elem_count_half = 8 >> vece;
 								    uint64_t c;
 								    int i, j;
 								    TCGv_i64 hi, lo, t0, t1;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    hi = tcg_temp_new_i64();
 								    lo = tcg_temp_new_i64();
 								    t0 = tcg_temp_new_i64();
 								    t1 = tcg_temp_new_i64();
 								    tcg_gen_extu_tl_i64(t0, cpu_gpr[a->vrb]);
 								    tcg_gen_extract_i64(hi, t0, elem_count_half, elem_count_half);
 								    tcg_gen_extract_i64(lo, t0, 0, elem_count_half);
 								    /*
 								     * Spread the bits into their respective elements.
 								     * E.g. for bytes:
 								     * 00000000000000000000000000000000000000000000000000000000abcdefgh
 								     *   << 32 - 4
 								     * 0000000000000000000000000000abcdefgh0000000000000000000000000000
 								     *   |
 								     * 0000000000000000000000000000abcdefgh00000000000000000000abcdefgh
 								     *   << 16 - 2
 								     * 00000000000000abcdefgh00000000000000000000abcdefgh00000000000000
 								     *   |
 								     * 00000000000000abcdefgh000000abcdefgh000000abcdefgh000000abcdefgh
 								     *   << 8 - 1
 								     * 0000000abcdefgh000000abcdefgh000000abcdefgh000000abcdefgh0000000
 								     *   |
 								     * 0000000abcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgh
 								     *   & dup(1)
 								     * 0000000a0000000b0000000c0000000d0000000e0000000f0000000g0000000h
 								     *   * 0xff
 								     * aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
 								     */
 								    for (i = elem_count_half / 2, j = 32; i > 0; i >>= 1, j >>= 1) {
 								        tcg_gen_shli_i64(t0, hi, j - i);
 								        tcg_gen_shli_i64(t1, lo, j - i);
 								        tcg_gen_or_i64(hi, hi, t0);
 								        tcg_gen_or_i64(lo, lo, t1);
 								    }
 								    c = dup_const(vece, 1);
 								    tcg_gen_andi_i64(hi, hi, c);
 								    tcg_gen_andi_i64(lo, lo, c);
 								    c = MAKE_64BIT_MASK(0, elem_width);
 								    tcg_gen_muli_i64(hi, hi, c);
 								    tcg_gen_muli_i64(lo, lo, c);
 								    set_avr64(a->vrt, lo, false);
 								    set_avr64(a->vrt, hi, true);
 								    tcg_temp_free_i64(hi);
 								    tcg_temp_free_i64(lo);
 								    tcg_temp_free_i64(t0);
 								    tcg_temp_free_i64(t1);
 								    return true;
 								}
 								TRANS(MTVSRBM, do_mtvsrm, MO_8)
 								TRANS(MTVSRHM, do_mtvsrm, MO_16)
 								TRANS(MTVSRWM, do_mtvsrm, MO_32)
 								TRANS(MTVSRDM, do_mtvsrm, MO_64)
 								static bool trans_MTVSRQM(DisasContext *ctx, arg_VX_tb *a)
 								{
 								    TCGv_i64 tmp;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    tmp = tcg_temp_new_i64();
 								    tcg_gen_ext_tl_i64(tmp, cpu_gpr[a->vrb]);
 								    tcg_gen_sextract_i64(tmp, tmp, 0, 1);
 								    set_avr64(a->vrt, tmp, false);
 								    set_avr64(a->vrt, tmp, true);
 								    tcg_temp_free_i64(tmp);
 								    return true;
 								}
 								static bool trans_MTVSRBMI(DisasContext *ctx, arg_DX_b *a)
 								{
 								    const uint64_t mask = dup_const(MO_8, 1);
 								    uint64_t hi, lo;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    hi = extract16(a->b, 8, 8);
 								    lo = extract16(a->b, 0, 8);
 								    for (int i = 4, j = 32; i > 0; i >>= 1, j >>= 1) {
 								        hi |= hi << (j - i);
 								        lo |= lo << (j - i);
 								    }
 								    hi = (hi & mask) * 0xFF;
 								    lo = (lo & mask) * 0xFF;
 								    set_avr64(a->vrt, tcg_constant_i64(hi), true);
 								    set_avr64(a->vrt, tcg_constant_i64(lo), false);
 								    return true;
 								}
-												target/ppc: implement vcntmb[bhwd]

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-18-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static bool do_vcntmb(DisasContext *ctx, arg_VX_mp *a, int vece)
 								{
 								    TCGv_i64 rt, vrb, mask;
 								    rt = tcg_const_i64(0);
 								    vrb = tcg_temp_new_i64();
 								    mask = tcg_constant_i64(dup_const(vece, 1ULL << ((8 << vece) - 1)));
 								    for (int i = 0; i < 2; i++) {
 								        get_avr64(vrb, a->vrb, i);
 								        if (a->mp) {
 								            tcg_gen_and_i64(vrb, mask, vrb);
 								        } else {
 								            tcg_gen_andc_i64(vrb, mask, vrb);
 								        }
 								        tcg_gen_ctpop_i64(vrb, vrb);
 								        tcg_gen_add_i64(rt, rt, vrb);
 								    }
 								    tcg_gen_shli_i64(rt, rt, TARGET_LONG_BITS - 8 + vece);
 								    tcg_gen_trunc_i64_tl(cpu_gpr[a->rt], rt);
 								    tcg_temp_free_i64(vrb);
 								    tcg_temp_free_i64(rt);
 								    return true;
 								}
 								TRANS(VCNTMBB, do_vcntmb, MO_8)
 								TRANS(VCNTMBH, do_vcntmb, MO_16)
 								TRANS(VCNTMBW, do_vcntmb, MO_32)
 								TRANS(VCNTMBD, do_vcntmb, MO_64)
-												target/ppc: implement vstri[bh][lr]

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-15-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static bool do_vstri(DisasContext *ctx, arg_VX_tb_rc *a,
 								                     void (*gen_helper)(TCGv_i32, TCGv_ptr, TCGv_ptr))
 								{
 								    TCGv_ptr vrt, vrb;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    vrt = gen_avr_ptr(a->vrt);
 								    vrb = gen_avr_ptr(a->vrb);
 								    if (a->rc) {
 								        gen_helper(cpu_crf[6], vrt, vrb);
 								    } else {
 								        TCGv_i32 discard = tcg_temp_new_i32();
 								        gen_helper(discard, vrt, vrb);
 								        tcg_temp_free_i32(discard);
 								    }
 								    tcg_temp_free_ptr(vrt);
 								    tcg_temp_free_ptr(vrb);
 								    return true;
 								}
 								TRANS(VSTRIBL, do_vstri, gen_helper_VSTRIBL)
 								TRANS(VSTRIBR, do_vstri, gen_helper_VSTRIBR)
 								TRANS(VSTRIHL, do_vstri, gen_helper_VSTRIHL)
 								TRANS(VSTRIHR, do_vstri, gen_helper_VSTRIHR)
-												target/ppc: implement vclrrb

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-17-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static bool do_vclrb(DisasContext *ctx, arg_VX *a, bool right)
-												target/ppc: implement vclrlb

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-16-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								{
 								    TCGv_i64 rb, mh, ml, tmp,
 								             ones = tcg_constant_i64(-1),
 								             zero = tcg_constant_i64(0);
 								    rb = tcg_temp_new_i64();
 								    mh = tcg_temp_new_i64();
 								    ml = tcg_temp_new_i64();
 								    tmp = tcg_temp_new_i64();
 								    tcg_gen_extu_tl_i64(rb, cpu_gpr[a->vrb]);
 								    tcg_gen_andi_i64(tmp, rb, 7);
 								    tcg_gen_shli_i64(tmp, tmp, 3);
-												target/ppc: implement vclrrb

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-17-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    if (right) {
 								        tcg_gen_shr_i64(tmp, ones, tmp);
 								    } else {
 								        tcg_gen_shl_i64(tmp, ones, tmp);
 								    }
-												target/ppc: implement vclrlb

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-16-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    tcg_gen_not_i64(tmp, tmp);
-												target/ppc: implement vclrrb

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-17-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    if (right) {
 								        tcg_gen_movcond_i64(TCG_COND_LTU, mh, rb, tcg_constant_i64(8),
 								                            tmp, ones);
 								        tcg_gen_movcond_i64(TCG_COND_LTU, ml, rb, tcg_constant_i64(8),
 								                            zero, tmp);
 								        tcg_gen_movcond_i64(TCG_COND_LTU, ml, rb, tcg_constant_i64(16),
 								                            ml, ones);
 								    } else {
 								        tcg_gen_movcond_i64(TCG_COND_LTU, ml, rb, tcg_constant_i64(8),
 								                            tmp, ones);
 								        tcg_gen_movcond_i64(TCG_COND_LTU, mh, rb, tcg_constant_i64(8),
 								                            zero, tmp);
 								        tcg_gen_movcond_i64(TCG_COND_LTU, mh, rb, tcg_constant_i64(16),
 								                            mh, ones);
 								    }
-												target/ppc: implement vclrlb

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-16-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
 								    get_avr64(tmp, a->vra, true);
 								    tcg_gen_and_i64(tmp, tmp, mh);
 								    set_avr64(a->vrt, tmp, true);
 								    get_avr64(tmp, a->vra, false);
 								    tcg_gen_and_i64(tmp, tmp, ml);
 								    set_avr64(a->vrt, tmp, false);
 								    tcg_temp_free_i64(rb);
 								    tcg_temp_free_i64(mh);
 								    tcg_temp_free_i64(ml);
 								    tcg_temp_free_i64(tmp);
 								    return true;
 								}
-												target/ppc: implement vclrrb

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-17-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								TRANS(VCLRLB, do_vclrb, false)
 								TRANS(VCLRRB, do_vclrb, true)
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								#define GEN_VAFORM_PAIRED(name0, name1, opc2)                           \
 								static void glue(gen_, name0##_##name1)(DisasContext *ctx)              \
 								    {                                                                   \
 								        TCGv_ptr ra, rb, rc, rd;                                        \
 								        if (unlikely(!ctx->altivec_enabled)) {                          \
 								            gen_exception(ctx, POWERPC_EXCP_VPU);                       \
 								            return;                                                     \
 								        }                                                               \
 								        ra = gen_avr_ptr(rA(ctx->opcode));                              \
 								        rb = gen_avr_ptr(rB(ctx->opcode));                              \
 								        rc = gen_avr_ptr(rC(ctx->opcode));                              \
 								        rd = gen_avr_ptr(rD(ctx->opcode));                              \
 								        if (Rc(ctx->opcode)) {                                          \
 								            gen_helper_##name1(cpu_env, rd, ra, rb, rc);                \
 								        } else {                                                        \
 								            gen_helper_##name0(cpu_env, rd, ra, rb, rc);                \
 								        }                                                               \
 								        tcg_temp_free_ptr(ra);                                          \
 								        tcg_temp_free_ptr(rb);                                          \
 								        tcg_temp_free_ptr(rc);                                          \
 								        tcg_temp_free_ptr(rd);                                          \
 								    }
 								GEN_VAFORM_PAIRED(vmhaddshs, vmhraddshs, 16)
 								static void gen_vmladduhm(DisasContext *ctx)
 								{
 								    TCGv_ptr ra, rb, rc, rd;
 								    if (unlikely(!ctx->altivec_enabled)) {
 								        gen_exception(ctx, POWERPC_EXCP_VPU);
 								        return;
 								    }
 								    ra = gen_avr_ptr(rA(ctx->opcode));
 								    rb = gen_avr_ptr(rB(ctx->opcode));
 								    rc = gen_avr_ptr(rC(ctx->opcode));
 								    rd = gen_avr_ptr(rD(ctx->opcode));
 								    gen_helper_vmladduhm(rd, ra, rb, rc);
 								    tcg_temp_free_ptr(ra);
 								    tcg_temp_free_ptr(rb);
 								    tcg_temp_free_ptr(rc);
 								    tcg_temp_free_ptr(rd);
 								}
-												target/ppc: introduce do_va_helper

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220517123929.284511-10-matheus.ferst@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>

											
										
										
											2022-05-17 15:39:26 +03:00
+								static bool do_va_helper(DisasContext *ctx, arg_VA *a,
 								    void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
-												target-ppc: add vector permute right indexed instruction

Add vpermr instruction from ISA 3.0.

Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-09-06 08:04:10 +03:00
+								{
-												target/ppc: Move vsel and vperm/vpermr to decodetree

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-29-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    TCGv_ptr vrt, vra, vrb, vrc;
 								    REQUIRE_VECTOR(ctx);
 								    vrt = gen_avr_ptr(a->vrt);
 								    vra = gen_avr_ptr(a->vra);
 								    vrb = gen_avr_ptr(a->vrb);
 								    vrc = gen_avr_ptr(a->rc);
-												target/ppc: introduce do_va_helper

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220517123929.284511-10-matheus.ferst@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>

											
										
										
											2022-05-17 15:39:26 +03:00
+								    gen_helper(vrt, vra, vrb, vrc);
-												target/ppc: Move vsel and vperm/vpermr to decodetree

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-29-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								    tcg_temp_free_ptr(vrt);
 								    tcg_temp_free_ptr(vra);
 								    tcg_temp_free_ptr(vrb);
 								    tcg_temp_free_ptr(vrc);
 								    return true;
 								}
-												target/ppc: introduce do_va_helper

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220517123929.284511-10-matheus.ferst@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>

											
										
										
											2022-05-17 15:39:26 +03:00
+								TRANS_FLAGS(ALTIVEC, VPERM, do_va_helper, gen_helper_VPERM)
 								TRANS_FLAGS2(ISA300, VPERMR, do_va_helper, gen_helper_VPERMR)
-												target/ppc: Move vsel and vperm/vpermr to decodetree

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-29-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
 								static bool trans_VSEL(DisasContext *ctx, arg_VA *a)
 								{
 								    REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
 								    REQUIRE_VECTOR(ctx);
 								    tcg_gen_gvec_bitsel(MO_64, avr_full_offset(a->vrt), avr_full_offset(a->rc),
 								                        avr_full_offset(a->vrb), avr_full_offset(a->vra),
 , 16);
 								    return true;
-												target-ppc: add vector permute right indexed instruction

Add vpermr instruction from ISA 3.0.

Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-09-06 08:04:10 +03:00
+								}
-												target/ppc: declare vmsum[um]bm helpers with call flags

Move vmsumubm and vmsummbm to decodetree, declare both helpers with
TCG_CALL_NO_RWG, and drop the unused env argument.

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220517123929.284511-11-matheus.ferst@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>

											
										
										
											2022-05-17 15:39:27 +03:00
+								TRANS_FLAGS(ALTIVEC, VMSUMUBM, do_va_helper, gen_helper_VMSUMUBM)
 								TRANS_FLAGS(ALTIVEC, VMSUMMBM, do_va_helper, gen_helper_VMSUMMBM)
-												target/ppc: declare vmsumsh[ms] helper with call flags

Move vmsumshm and vmsumshs to decodetree, declare vmsumshm helper with
TCG_CALL_NO_RWG, and drop the unused env argument.

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220517123929.284511-13-matheus.ferst@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>

											
										
										
											2022-05-17 15:39:29 +03:00
+								TRANS_FLAGS(ALTIVEC, VMSUMSHM, do_va_helper, gen_helper_VMSUMSHM)
-												target/ppc: declare vmsumuh[ms] helper with call flags

Move vmsumuhm and vmsumuhs to decodetree, declare vmsumuhm helper with
TCG_CALL_NO_RWG, and drop the unused env argument.

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220517123929.284511-12-matheus.ferst@eldorado.org.br>
[danielhb: added #undef VMSUMUHM to fix ppc64 build]
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>

											
										
										
											2022-05-17 15:39:28 +03:00
+								TRANS_FLAGS(ALTIVEC, VMSUMUHM, do_va_helper, gen_helper_VMSUMUHM)
 								static bool do_va_env_helper(DisasContext *ctx, arg_VA *a,
 								    void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
 								{
 								    TCGv_ptr vrt, vra, vrb, vrc;
 								    REQUIRE_VECTOR(ctx);
 								    vrt = gen_avr_ptr(a->vrt);
 								    vra = gen_avr_ptr(a->vra);
 								    vrb = gen_avr_ptr(a->vrb);
 								    vrc = gen_avr_ptr(a->rc);
 								    gen_helper(cpu_env, vrt, vra, vrb, vrc);
 								    tcg_temp_free_ptr(vrt);
 								    tcg_temp_free_ptr(vra);
 								    tcg_temp_free_ptr(vrb);
 								    tcg_temp_free_ptr(vrc);
 								    return true;
 								}
 								TRANS_FLAGS(ALTIVEC, VMSUMUHS, do_va_env_helper, gen_helper_VMSUMUHS)
-												target/ppc: declare vmsumsh[ms] helper with call flags

Move vmsumshm and vmsumshs to decodetree, declare vmsumshm helper with
TCG_CALL_NO_RWG, and drop the unused env argument.

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220517123929.284511-13-matheus.ferst@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>

											
										
										
											2022-05-17 15:39:29 +03:00
+								TRANS_FLAGS(ALTIVEC, VMSUMSHS, do_va_env_helper, gen_helper_VMSUMSHS)
-												target/ppc: declare vmsum[um]bm helpers with call flags

Move vmsumubm and vmsummbm to decodetree, declare both helpers with
TCG_CALL_NO_RWG, and drop the unused env argument.

Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220517123929.284511-11-matheus.ferst@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>

											
										
										
											2022-05-17 15:39:27 +03:00
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23)
 								GEN_VXFORM_NOA(vclzb, 1, 28)
 								GEN_VXFORM_NOA(vclzh, 1, 29)
-												target/ppc: Optimize emulation of vclzw instruction

Optimize Altivec instruction vclzw (Vector Count Leading Zeros Word).
This instruction counts the number of leading zeros of each word element
in source register and places result in the appropriate word element of
destination register.

Counting is to be performed in four iterations of for loop(one for each
word elemnt of source register vB). Every iteration consists of loading
appropriate word element from source register, counting leading zeros
with tcg_gen_clzi_i32, and saving the result in appropriate word element
of destination register.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-7-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:52 +03:00
+								GEN_VXFORM_TRANS(vclzw, 1, 30)
-												target/ppc: Optimize emulation of vclzd instruction

Optimize Altivec instruction vclzd (Vector Count Leading Zeros Doubleword).
This instruction counts the number of leading zeros of each doubleword element
in source register and places result in the appropriate doubleword element of
destination register.

Using tcg-s count leading zeros instruction two times(once for each
doubleword element of source register vB) and placing result in
appropriate doubleword element of destination register vD.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-6-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:51 +03:00
+								GEN_VXFORM_TRANS(vclzd, 1, 31)
-												target-ppc: implement vnegw/d instructions

Vector Integer Negate Instructions:

vnegw: Vector Negate Word
vnegd: Vector Negate Doubleword

Signed-off-by: Nikunj A Dadhania <nikunj@linux.vnet.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-10-19 09:06:46 +03:00
+								GEN_VXFORM_NOA_2(vnegw, 1, 24, 6)
 								GEN_VXFORM_NOA_2(vnegd, 1, 24, 7)
-												target/ppc: Move vexts[bhw]2[wd] to decodetree

Move the following instructions to decodetree:
vextsb2w: Vector Extend Sign Byte To Word
vextsh2w: Vector Extend Sign Halfword To Word
vextsb2d: Vector Extend Sign Byte To Doubleword
vextsh2d: Vector Extend Sign Halfword To Doubleword
vextsw2d: Vector Extend Sign Word To Doubleword

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Lucas Coutinho <lucas.coutinho@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-8-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
 								static void gen_vexts_i64(TCGv_i64 t, TCGv_i64 b, int64_t s)
 								{
 								    tcg_gen_sextract_i64(t, b, 0, 64 - s);
 								}
 								static void gen_vexts_i32(TCGv_i32 t, TCGv_i32 b, int32_t s)
 								{
 								    tcg_gen_sextract_i32(t, b, 0, 32 - s);
 								}
 								static void gen_vexts_vec(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t s)
 								{
 								    tcg_gen_shli_vec(vece, t, b, s);
 								    tcg_gen_sari_vec(vece, t, t, s);
 								}
 								static bool do_vexts(DisasContext *ctx, arg_VX_tb *a, unsigned vece, int64_t s)
 								{
 								    static const TCGOpcode vecop_list[] = {
 								        INDEX_op_shli_vec, INDEX_op_sari_vec, 0
 								    };
 								    static const GVecGen2i op[2] = {
 								        {
 								            .fni4 = gen_vexts_i32,
 								            .fniv = gen_vexts_vec,
 								            .opt_opc = vecop_list,
 								            .vece = MO_32
 								        },
 								        {
 								            .fni8 = gen_vexts_i64,
 								            .fniv = gen_vexts_vec,
 								            .opt_opc = vecop_list,
 								            .vece = MO_64
 								        },
 								    };
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 								    REQUIRE_VECTOR(ctx);
 								    tcg_gen_gvec_2i(avr_full_offset(a->vrt), avr_full_offset(a->vrb),
 , 16, s, &op[vece - MO_32]);
 								    return true;
 								}
 								TRANS(VEXTSB2W, do_vexts, MO_32, 24);
 								TRANS(VEXTSH2W, do_vexts, MO_32, 16);
 								TRANS(VEXTSB2D, do_vexts, MO_64, 56);
 								TRANS(VEXTSH2D, do_vexts, MO_64, 48);
 								TRANS(VEXTSW2D, do_vexts, MO_64, 32);
-												target/ppc: Implement vextsd2q

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Lucas Coutinho <lucas.coutinho@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-9-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static bool trans_VEXTSD2Q(DisasContext *ctx, arg_VX_tb *a)
 								{
 								    TCGv_i64 tmp;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    tmp = tcg_temp_new_i64();
 								    get_avr64(tmp, a->vrb, false);
 								    set_avr64(a->vrt, tmp, false);
 								    tcg_gen_sari_i64(tmp, tmp, 63);
 								    set_avr64(a->vrt, tmp, true);
 								    tcg_temp_free_i64(tmp);
 								    return true;
 								}
-												target-ppc: add vector count trailing zeros instructions

The following vector count trailing zeros instructions are
added from ISA 3.0.

vctzb - Vector Count Trailing Zeros Byte
vctzh - Vector Count Trailing Zeros Halfword
vctzw - Vector Count Trailing Zeros Word
vctzd - Vector Count Trailing Zeros Doubleword

Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-09-06 08:04:08 +03:00
+								GEN_VXFORM_NOA_2(vctzb, 1, 24, 28)
 								GEN_VXFORM_NOA_2(vctzh, 1, 24, 29)
 								GEN_VXFORM_NOA_2(vctzw, 1, 24, 30)
 								GEN_VXFORM_NOA_2(vctzd, 1, 24, 31)
-												target-ppc: add vclzlsbb/vctzlsbb instructions

The following vector instructions are added from ISA 3.0.

vclzlsbb - Vector Count Leading Zero Least-Significant Bits Byte
vctzlsbb - Vector Count Trailing Zero Least-Significant Bits Byte

Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-09-28 08:45:18 +03:00
+								GEN_VXFORM_NOA_3(vclzlsbb, 1, 24, 0)
 								GEN_VXFORM_NOA_3(vctzlsbb, 1, 24, 1)
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VXFORM_NOA(vpopcntb, 1, 28)
 								GEN_VXFORM_NOA(vpopcnth, 1, 29)
 								GEN_VXFORM_NOA(vpopcntw, 1, 30)
 								GEN_VXFORM_NOA(vpopcntd, 1, 31)
 								GEN_VXFORM_DUAL(vclzb, PPC_NONE, PPC2_ALTIVEC_207, \
 								                vpopcntb, PPC_NONE, PPC2_ALTIVEC_207)
 								GEN_VXFORM_DUAL(vclzh, PPC_NONE, PPC2_ALTIVEC_207, \
 								                vpopcnth, PPC_NONE, PPC2_ALTIVEC_207)
 								GEN_VXFORM_DUAL(vclzw, PPC_NONE, PPC2_ALTIVEC_207, \
 								                vpopcntw, PPC_NONE, PPC2_ALTIVEC_207)
 								GEN_VXFORM_DUAL(vclzd, PPC_NONE, PPC2_ALTIVEC_207, \
 								                vpopcntd, PPC_NONE, PPC2_ALTIVEC_207)
-												target-ppc: add vector bit permute doubleword instruction

Add vbpermd instruction from ISA 3.0.

Signed-off-by: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-09-06 08:04:09 +03:00
+								GEN_VXFORM(vbpermd, 6, 23);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VXFORM(vbpermq, 6, 21);
-												target/ppc: Optimize emulation of vgbbd instruction

Optimize altivec instruction vgbbd (Vector Gather Bits by Bytes by Doubleword)
All ith bits (i in range 1 to 8) of each byte of doubleword element in
source register are concatenated and placed into ith byte of appropriate
doubleword element in destination register.

Following solution is done for both doubleword elements of source register
in parallel, in order to reduce the number of instructions needed(that's why
arrays are used):
First, both doubleword elements of source register vB are placed in
appropriate element of array avr. Bits are gathered in 2x8 iterations(2 for
loops). In first iteration bit 1 of byte 1, bit 2 of byte 2,... bit 8 of
byte 8 are in their final spots so avr[i], i={0,1} can be and-ed with
tcg_mask. For every following iteration, both avr[i] and tcg_mask variables
have to be shifted right for 7 and 8 places, respectively, in order to get
bit 1 of byte 2, bit 2 of byte 3.. bit 7 of byte 8 in their final spots so
shifted avr values(saved in tmp) can be and-ed with new value of tcg_mask...
After first 8 iteration(first loop), all the first bits are in their final
places, all second bits but second bit from eight byte are in their places...
only 1 eight bit from eight byte is in it's place). In second loop we do all
operations symmetrically, in order to get other half of bits in their final
spots. Results for first and second doubleword elements are saved in
result[0] and result[1] respectively. In the end those results are saved in
appropriate doubleword element of destination register vD.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-5-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2019-07-15 17:22:50 +03:00
+								GEN_VXFORM_TRANS(vgbbd, 6, 20);
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_VXFORM(vpmsumb, 4, 16)
 								GEN_VXFORM(vpmsumh, 4, 17)
 								GEN_VXFORM(vpmsumw, 4, 18)
 								GEN_VXFORM(vpmsumd, 4, 19)
 								#define GEN_BCD(op)                                 \
 								static void gen_##op(DisasContext *ctx)             \
 								{                                                   \
 								    TCGv_ptr ra, rb, rd;                            \
 								    TCGv_i32 ps;                                    \
 								                                                    \
 								    if (unlikely(!ctx->altivec_enabled)) {          \
 								        gen_exception(ctx, POWERPC_EXCP_VPU);       \
 								        return;                                     \
 								    }                                               \
 								                                                    \
 								    ra = gen_avr_ptr(rA(ctx->opcode));              \
 								    rb = gen_avr_ptr(rB(ctx->opcode));              \
 								    rd = gen_avr_ptr(rD(ctx->opcode));              \
 								                                                    \
 								    ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
 								                                                    \
 								    gen_helper_##op(cpu_crf[6], rd, ra, rb, ps);    \
 								                                                    \
 								    tcg_temp_free_ptr(ra);                          \
 								    tcg_temp_free_ptr(rb);                          \
 								    tcg_temp_free_ptr(rd);                          \
 								    tcg_temp_free_i32(ps);                          \
 								}
-												target-ppc: Implement bcdcfn. instruction

bcdcfn. converts from National numeric format to BCD. National format
uses a byte to represent a digit where the most significant nibble is
always 0x3 and the least sign. nibbles is the digit itself.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:22 +03:00
+								#define GEN_BCD2(op)                                \
 								static void gen_##op(DisasContext *ctx)             \
 								{                                                   \
 								    TCGv_ptr rd, rb;                                \
 								    TCGv_i32 ps;                                    \
 								                                                    \
 								    if (unlikely(!ctx->altivec_enabled)) {          \
 								        gen_exception(ctx, POWERPC_EXCP_VPU);       \
 								        return;                                     \
 								    }                                               \
 								                                                    \
 								    rb = gen_avr_ptr(rB(ctx->opcode));              \
 								    rd = gen_avr_ptr(rD(ctx->opcode));              \
 								                                                    \
 								    ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
 								                                                    \
 								    gen_helper_##op(cpu_crf[6], rd, rb, ps);        \
 								                                                    \
 								    tcg_temp_free_ptr(rb);                          \
 								    tcg_temp_free_ptr(rd);                          \
 								    tcg_temp_free_i32(ps);                          \
 								}
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								GEN_BCD(bcdadd)
 								GEN_BCD(bcdsub)
-												target-ppc: Implement bcdcfn. instruction

bcdcfn. converts from National numeric format to BCD. National format
uses a byte to represent a digit where the most significant nibble is
always 0x3 and the least sign. nibbles is the digit itself.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:22 +03:00
+								GEN_BCD2(bcdcfn)
-												target-ppc: Implement bcdctn. instruction

bcdctn. converts from BCD to National numeric format. National format
uses a byte to represent a digit where the most significant nibble is
always 0x3 and the least sign. nibbles is the digit itself.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:23 +03:00
+								GEN_BCD2(bcdctn)
-												target-ppc: Implement bcdcfz. instruction

bcdcfz. converts from Zoned numeric format to BCD. Zoned format uses
a byte to represent a digit where the most significant nibble is 0x3
or 0xf, depending on the preferred signal.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:24 +03:00
+								GEN_BCD2(bcdcfz)
-												target-ppc: Implement bcdctz. instruction

bcdctz. converts from BCD to Zoned numeric format. Zoned format uses
a byte to represent a digit where the most significant nibble is 0x3
or 0xf, depending on the preferred signal.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:25 +03:00
+								GEN_BCD2(bcdctz)
-												target-ppc: Implement bcdcfsq. instruction

bcdcfsq.: Decimal convert from signed quadword. It is not possible
to convert values less than -10^31-1 or greater than 10^31-1 to be
represented in packed decimal format.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
[dwg: Corrected constant which should be 10^16-1 but was 10^17-1]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-25 06:53:30 +03:00
+								GEN_BCD2(bcdcfsq)
-												target-ppc: Implement bcdctsq. instruction

bcdctsq.: Decimal convert to signed quadword. It is possible to
convert packed decimal values to signed quadwords.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-25 06:53:31 +03:00
+								GEN_BCD2(bcdctsq)
-												target-ppc: Implement bcdsetsgn. instruction

bcdsetsgn.: Decimal set sign. This instruction copies the register
value to the result register but adjust the signal according to
the preferred sign value.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-25 06:53:33 +03:00
+								GEN_BCD2(bcdsetsgn)
-												target-ppc: Implement bcdcpsgn. instruction

bcdcpsgn.: Decimal copy sign. Given two registers vra and vrb, it
copies the vra value with vrb sign to the result register vrt.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-25 06:53:32 +03:00
+								GEN_BCD(bcdcpsgn);
-												ppc: Implement bcds. instruction

bcds.: Decimal shift. Given two registers vra and vrb, this instruction
shift the vrb value by vra bits into the result register.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2017-01-10 05:10:10 +03:00
+								GEN_BCD(bcds);
-												ppc: Implement bcdus. instruction

bcdus.: Decimal unsigned shift. This instruction works like bcds. but
considers only unsigned BCDs (no sign in least meaning 4 bits).

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2017-01-10 05:10:11 +03:00
+								GEN_BCD(bcdus);
-												ppc: Implement bcdsr. instruction

bcdsr.: Decimal shift and round. This instruction works like bcds.
however, when performing right shift, 1 will be added to the
result if the last digit was >= 5.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2017-01-10 05:10:12 +03:00
+								GEN_BCD(bcdsr);
-												ppc: Implement bcdtrunc. instruction

bcdtrunc.: Decimal integer truncate. Given a BCD number in vrb and the
number of bytes to truncate in vra, the return register will have vrb
with such bits truncated.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2017-01-12 23:08:32 +03:00
+								GEN_BCD(bcdtrunc);
-												ppc: Implement bcdutrunc. instruction

bcdutrunc. Decimal unsigned truncate. Works like bcdtrunc. with
unsigned BCD numbers.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2017-01-12 23:08:33 +03:00
+								GEN_BCD(bcdutrunc);
-												target-ppc: Implement bcdcfn. instruction

bcdcfn. converts from National numeric format to BCD. National format
uses a byte to represent a digit where the most significant nibble is
always 0x3 and the least sign. nibbles is the digit itself.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:22 +03:00
 								static void gen_xpnd04_1(DisasContext *ctx)
 								{
 								    switch (opc4(ctx->opcode)) {
-												target-ppc: Implement bcdctsq. instruction

bcdctsq.: Decimal convert to signed quadword. It is possible to
convert packed decimal values to signed quadwords.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-25 06:53:31 +03:00
+								    case 0:
 								        gen_bcdctsq(ctx);
 								        break;
-												target-ppc: Implement bcdcfsq. instruction

bcdcfsq.: Decimal convert from signed quadword. It is not possible
to convert values less than -10^31-1 or greater than 10^31-1 to be
represented in packed decimal format.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
[dwg: Corrected constant which should be 10^16-1 but was 10^17-1]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-25 06:53:30 +03:00
+								    case 2:
 								        gen_bcdcfsq(ctx);
 								        break;
-												target-ppc: Implement bcdctz. instruction

bcdctz. converts from BCD to Zoned numeric format. Zoned format uses
a byte to represent a digit where the most significant nibble is 0x3
or 0xf, depending on the preferred signal.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:25 +03:00
+								    case 4:
 								        gen_bcdctz(ctx);
 								        break;
-												target-ppc: Implement bcdctn. instruction

bcdctn. converts from BCD to National numeric format. National format
uses a byte to represent a digit where the most significant nibble is
always 0x3 and the least sign. nibbles is the digit itself.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:23 +03:00
+								    case 5:
 								        gen_bcdctn(ctx);
 								        break;
-												target-ppc: Implement bcdcfz. instruction

bcdcfz. converts from Zoned numeric format to BCD. Zoned format uses
a byte to represent a digit where the most significant nibble is 0x3
or 0xf, depending on the preferred signal.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:24 +03:00
+								    case 6:
 								        gen_bcdcfz(ctx);
 								        break;
-												target-ppc: Implement bcdcfn. instruction

bcdcfn. converts from National numeric format to BCD. National format
uses a byte to represent a digit where the most significant nibble is
always 0x3 and the least sign. nibbles is the digit itself.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:22 +03:00
+								    case 7:
 								        gen_bcdcfn(ctx);
 								        break;
-												target-ppc: Implement bcdsetsgn. instruction

bcdsetsgn.: Decimal set sign. This instruction copies the register
value to the result register but adjust the signal according to
the preferred sign value.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-25 06:53:33 +03:00
+								    case 31:
 								        gen_bcdsetsgn(ctx);
 								        break;
-												target-ppc: Implement bcdcfn. instruction

bcdcfn. converts from National numeric format to BCD. National format
uses a byte to represent a digit where the most significant nibble is
always 0x3 and the least sign. nibbles is the digit itself.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:22 +03:00
+								    default:
 								        gen_invalid(ctx);
 								        break;
 								    }
 								}
 								static void gen_xpnd04_2(DisasContext *ctx)
 								{
 								    switch (opc4(ctx->opcode)) {
-												target-ppc: Implement bcdctsq. instruction

bcdctsq.: Decimal convert to signed quadword. It is possible to
convert packed decimal values to signed quadwords.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-25 06:53:31 +03:00
+								    case 0:
 								        gen_bcdctsq(ctx);
 								        break;
-												target-ppc: Implement bcdcfsq. instruction

bcdcfsq.: Decimal convert from signed quadword. It is not possible
to convert values less than -10^31-1 or greater than 10^31-1 to be
represented in packed decimal format.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
[dwg: Corrected constant which should be 10^16-1 but was 10^17-1]
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-25 06:53:30 +03:00
+								    case 2:
 								        gen_bcdcfsq(ctx);
 								        break;
-												target-ppc: Implement bcdctz. instruction

bcdctz. converts from BCD to Zoned numeric format. Zoned format uses
a byte to represent a digit where the most significant nibble is 0x3
or 0xf, depending on the preferred signal.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:25 +03:00
+								    case 4:
 								        gen_bcdctz(ctx);
 								        break;
-												target-ppc: Implement bcdcfz. instruction

bcdcfz. converts from Zoned numeric format to BCD. Zoned format uses
a byte to represent a digit where the most significant nibble is 0x3
or 0xf, depending on the preferred signal.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:24 +03:00
+								    case 6:
 								        gen_bcdcfz(ctx);
 								        break;
-												target-ppc: Implement bcdcfn. instruction

bcdcfn. converts from National numeric format to BCD. National format
uses a byte to represent a digit where the most significant nibble is
always 0x3 and the least sign. nibbles is the digit itself.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:22 +03:00
+								    case 7:
 								        gen_bcdcfn(ctx);
 								        break;
-												target-ppc: Implement bcdsetsgn. instruction

bcdsetsgn.: Decimal set sign. This instruction copies the register
value to the result register but adjust the signal according to
the preferred sign value.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-25 06:53:33 +03:00
+								    case 31:
 								        gen_bcdsetsgn(ctx);
 								        break;
-												target-ppc: Implement bcdcfn. instruction

bcdcfn. converts from National numeric format to BCD. National format
uses a byte to represent a digit where the most significant nibble is
always 0x3 and the least sign. nibbles is the digit itself.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:22 +03:00
+								    default:
 								        gen_invalid(ctx);
 								        break;
 								    }
 								}
-												target-ppc: Implement bcdsetsgn. instruction

bcdsetsgn.: Decimal set sign. This instruction copies the register
value to the result register but adjust the signal according to
the preferred sign value.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-25 06:53:33 +03:00
-												target-ppc: Implement bcdcfn. instruction

bcdcfn. converts from National numeric format to BCD. National format
uses a byte to represent a digit where the most significant nibble is
always 0x3 and the least sign. nibbles is the digit itself.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:22 +03:00
+								GEN_VXFORM_DUAL(vsubcuw, PPC_ALTIVEC, PPC_NONE, \
 								                xpnd04_1, PPC_NONE, PPC2_ISA300)
 								GEN_VXFORM_DUAL(vsubsws, PPC_ALTIVEC, PPC_NONE, \
 								                xpnd04_2, PPC_NONE, PPC2_ISA300)
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
 								GEN_VXFORM_DUAL(vsububm, PPC_ALTIVEC, PPC_NONE, \
 								                bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
 								GEN_VXFORM_DUAL(vsububs, PPC_ALTIVEC, PPC_NONE, \
 								                bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
 								GEN_VXFORM_DUAL(vsubuhm, PPC_ALTIVEC, PPC_NONE, \
 								                bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
 								GEN_VXFORM_DUAL(vsubuhs, PPC_ALTIVEC, PPC_NONE, \
 								                bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
-												target-ppc: Implement bcdcpsgn. instruction

bcdcpsgn.: Decimal copy sign. Given two registers vra and vrb, it
copies the vra value with vrb sign to the result register vrt.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-25 06:53:32 +03:00
+								GEN_VXFORM_DUAL(vaddshs, PPC_ALTIVEC, PPC_NONE, \
 								                bcdcpsgn, PPC_NONE, PPC2_ISA300)
-												ppc: Implement bcds. instruction

bcds.: Decimal shift. Given two registers vra and vrb, this instruction
shift the vrb value by vra bits into the result register.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2017-01-10 05:10:10 +03:00
+								GEN_VXFORM_DUAL(vsubudm, PPC2_ALTIVEC_207, PPC_NONE, \
 								                bcds, PPC_NONE, PPC2_ISA300)
-												ppc: Implement bcdus. instruction

bcdus.: Decimal unsigned shift. This instruction works like bcds. but
considers only unsigned BCDs (no sign in least meaning 4 bits).

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2017-01-10 05:10:11 +03:00
+								GEN_VXFORM_DUAL(vsubuwm, PPC_ALTIVEC, PPC_NONE, \
 								                bcdus, PPC_NONE, PPC2_ISA300)
-												ppc: Implement bcdtrunc. instruction

bcdtrunc.: Decimal integer truncate. Given a BCD number in vrb and the
number of bytes to truncate in vra, the return register will have vrb
with such bits truncated.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2017-01-12 23:08:32 +03:00
+								GEN_VXFORM_DUAL(vsubsbs, PPC_ALTIVEC, PPC_NONE, \
 								                bcdtrunc, PPC_NONE, PPC2_ISA300)
 								GEN_VXFORM_DUAL(vsubuqm, PPC2_ALTIVEC_207, PPC_NONE, \
 								                bcdtrunc, PPC_NONE, PPC2_ISA300)
-												ppc: Implement bcdutrunc. instruction

bcdutrunc. Decimal unsigned truncate. Works like bcdtrunc. with
unsigned BCD numbers.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2017-01-12 23:08:33 +03:00
+								GEN_VXFORM_DUAL(vsubcuq, PPC2_ALTIVEC_207, PPC_NONE, \
 								                bcdutrunc, PPC_NONE, PPC2_ISA300)
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
 								static void gen_vsbox(DisasContext *ctx)
 								{
 								    TCGv_ptr ra, rd;
 								    if (unlikely(!ctx->altivec_enabled)) {
 								        gen_exception(ctx, POWERPC_EXCP_VPU);
 								        return;
 								    }
 								    ra = gen_avr_ptr(rA(ctx->opcode));
 								    rd = gen_avr_ptr(rD(ctx->opcode));
 								    gen_helper_vsbox(rd, ra);
 								    tcg_temp_free_ptr(ra);
 								    tcg_temp_free_ptr(rd);
 								}
 								GEN_VXFORM(vcipher, 4, 20)
 								GEN_VXFORM(vcipherlast, 4, 20)
 								GEN_VXFORM(vncipher, 4, 21)
 								GEN_VXFORM(vncipherlast, 4, 21)
 								GEN_VXFORM_DUAL(vcipher, PPC_NONE, PPC2_ALTIVEC_207,
 								                vcipherlast, PPC_NONE, PPC2_ALTIVEC_207)
 								GEN_VXFORM_DUAL(vncipher, PPC_NONE, PPC2_ALTIVEC_207,
 								                vncipherlast, PPC_NONE, PPC2_ALTIVEC_207)
 								#define VSHASIGMA(op)                         \
 								static void gen_##op(DisasContext *ctx)       \
 								{                                             \
 								    TCGv_ptr ra, rd;                          \
 								    TCGv_i32 st_six;                          \
 								    if (unlikely(!ctx->altivec_enabled)) {    \
 								        gen_exception(ctx, POWERPC_EXCP_VPU); \
 								        return;                               \
 								    }                                         \
 								    ra = gen_avr_ptr(rA(ctx->opcode));        \
 								    rd = gen_avr_ptr(rD(ctx->opcode));        \
 								    st_six = tcg_const_i32(rB(ctx->opcode));  \
 								    gen_helper_##op(rd, ra, st_six);          \
 								    tcg_temp_free_ptr(ra);                    \
 								    tcg_temp_free_ptr(rd);                    \
 								    tcg_temp_free_i32(st_six);                \
 								}
 								VSHASIGMA(vshasigmaw)
 								VSHASIGMA(vshasigmad)
 								GEN_VXFORM3(vpermxor, 22, 0xFF)
 								GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE,
 								                vpermxor, PPC_NONE, PPC2_ALTIVEC_207)
-												target/ppc: Move vcfuged to vmx-impl.c.inc

There's no reason to keep vector-impl.c.inc separate from
vmx-impl.c.inc. Additionally, let GVec handle the multiple calls to
helper_cfuged for us.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-2-matheus.ferst@eldorado.org.br>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:36:55 +03:00
+								static bool trans_VCFUGED(DisasContext *ctx, arg_VX *a)
 								{
 								    static const GVecGen3 g = {
 								        .fni8 = gen_helper_CFUGED,
 								        .vece = MO_64,
 								    };
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
 								                   avr_full_offset(a->vrb), 16, 16, &g);
 								    return true;
 								}
-												target/ppc: Implement vclzdm/vctzdm instructions

The signature of do_cntzdm is changed to allow reuse as GVecGen3i.fni8.
The method is also moved out of #ifdef TARGET_PPC64, as PowerISA doesn't
say vclzdm and vctzdm are 64-bit only.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-3-matheus.ferst@eldorado.org.br>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:36:56 +03:00
+								static bool trans_VCLZDM(DisasContext *ctx, arg_VX *a)
 								{
 								    static const GVecGen3i g = {
 								        .fni8 = do_cntzdm,
 								        .vece = MO_64,
 								    };
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    tcg_gen_gvec_3i(avr_full_offset(a->vrt), avr_full_offset(a->vra),
 								                    avr_full_offset(a->vrb), 16, 16, false, &g);
 								    return true;
 								}
 								static bool trans_VCTZDM(DisasContext *ctx, arg_VX *a)
 								{
 								    static const GVecGen3i g = {
 								        .fni8 = do_cntzdm,
 								        .vece = MO_64,
 								    };
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    tcg_gen_gvec_3i(avr_full_offset(a->vrt), avr_full_offset(a->vra),
 								                    avr_full_offset(a->vrb), 16, 16, true, &g);
 								    return true;
 								}
-												target/ppc: Implement vpdepd/vpextd instruction

pdepd and pextd helpers are moved out of #ifdef (TARGET_PPC64) to allow
them to be reused as GVecGen3.fni8.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20211104123719.323713-4-matheus.ferst@eldorado.org.br>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2021-11-04 15:36:57 +03:00
+								static bool trans_VPDEPD(DisasContext *ctx, arg_VX *a)
 								{
 								    static const GVecGen3 g = {
 								        .fni8 = gen_helper_PDEPD,
 								        .vece = MO_64,
 								    };
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
 								                   avr_full_offset(a->vrb), 16, 16, &g);
 								    return true;
 								}
 								static bool trans_VPEXTD(DisasContext *ctx, arg_VX *a)
 								{
 								    static const GVecGen3 g = {
 								        .fni8 = gen_helper_PEXTD,
 								        .vece = MO_64,
 								    };
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
 								                   avr_full_offset(a->vrb), 16, 16, &g);
 								    return true;
 								}
-												target/ppc: Implement vmsumudm instruction

Based on [1] by Lijun Pan <ljp@linux.ibm.com>, which was never merged
into master.

[1]: https://lists.gnu.org/archive/html/qemu-ppc/2020-07/msg00419.html

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Víctor Colombo <victor.colombo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-7-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static bool trans_VMSUMUDM(DisasContext *ctx, arg_VA *a)
 								{
 								    TCGv_i64 rl, rh, src1, src2;
 								    int dw;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA300);
 								    REQUIRE_VECTOR(ctx);
 								    rh = tcg_temp_new_i64();
 								    rl = tcg_temp_new_i64();
 								    src1 = tcg_temp_new_i64();
 								    src2 = tcg_temp_new_i64();
 								    get_avr64(rl, a->rc, false);
 								    get_avr64(rh, a->rc, true);
 								    for (dw = 0; dw < 2; dw++) {
 								        get_avr64(src1, a->vra, dw);
 								        get_avr64(src2, a->vrb, dw);
 								        tcg_gen_mulu2_i64(src1, src2, src1, src2);
 								        tcg_gen_add2_i64(rl, rh, rl, rh, src1, src2);
 								    }
 								    set_avr64(a->vrt, rl, false);
 								    set_avr64(a->vrt, rh, true);
 								    tcg_temp_free_i64(rl);
 								    tcg_temp_free_i64(rh);
 								    tcg_temp_free_i64(src1);
 								    tcg_temp_free_i64(src2);
 								    return true;
 								}
-												target/ppc: Implement vmsumcud instruction

Based on [1] by Lijun Pan <ljp@linux.ibm.com>, which was never merged
into master.

[1]: https://lists.gnu.org/archive/html/qemu-ppc/2020-07/msg00419.html

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Víctor Colombo <victor.colombo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-6-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:37 +03:00
+								static bool trans_VMSUMCUD(DisasContext *ctx, arg_VA *a)
 								{
 								    TCGv_i64 tmp0, tmp1, prod1h, prod1l, prod0h, prod0l, zero;
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    tmp0 = tcg_temp_new_i64();
 								    tmp1 = tcg_temp_new_i64();
 								    prod1h = tcg_temp_new_i64();
 								    prod1l = tcg_temp_new_i64();
 								    prod0h = tcg_temp_new_i64();
 								    prod0l = tcg_temp_new_i64();
 								    zero = tcg_constant_i64(0);
 								    /* prod1 = vsr[vra+32].dw[1] * vsr[vrb+32].dw[1] */
 								    get_avr64(tmp0, a->vra, false);
 								    get_avr64(tmp1, a->vrb, false);
 								    tcg_gen_mulu2_i64(prod1l, prod1h, tmp0, tmp1);
 								    /* prod0 = vsr[vra+32].dw[0] * vsr[vrb+32].dw[0] */
 								    get_avr64(tmp0, a->vra, true);
 								    get_avr64(tmp1, a->vrb, true);
 								    tcg_gen_mulu2_i64(prod0l, prod0h, tmp0, tmp1);
 								    /* Sum lower 64-bits elements */
 								    get_avr64(tmp1, a->rc, false);
 								    tcg_gen_add2_i64(tmp1, tmp0, tmp1, zero, prod1l, zero);
 								    tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod0l, zero);
 								    /*
 								     * Discard lower 64-bits, leaving the carry into bit 64.
 								     * Then sum the higher 64-bit elements.
 								     */
 								    get_avr64(tmp1, a->rc, true);
 								    tcg_gen_add2_i64(tmp1, tmp0, tmp0, zero, tmp1, zero);
 								    tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod1h, zero);
 								    tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod0h, zero);
 								    /* Discard 64 more bits to complete the CHOP128(temp >> 128) */
 								    set_avr64(a->vrt, tmp0, false);
 								    set_avr64(a->vrt, zero, true);
 								    tcg_temp_free_i64(tmp0);
 								    tcg_temp_free_i64(tmp1);
 								    tcg_temp_free_i64(prod1h);
 								    tcg_temp_free_i64(prod1l);
 								    tcg_temp_free_i64(prod0h);
 								    tcg_temp_free_i64(prod0l);
 								    return true;
 								}
-												target/ppc: moved vector even and odd multiplication to decodetree

Moved the instructions vmulesb, vmulosb, vmuleub, vmuloub,
vmulesh, vmulosh, vmuleuh, vmulouh, vmulesw, vmulosw,
muleuw and vmulouw from legacy to decodetree. Implemented
the instructions vmulesd, vmulosd, vmuleud, vmuloud.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-3-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:36 +03:00
+								static bool do_vx_helper(DisasContext *ctx, arg_VX *a,
 								                         void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr))
 								{
 								    TCGv_ptr ra, rb, rd;
 								    REQUIRE_VECTOR(ctx);
 								    ra = gen_avr_ptr(a->vra);
 								    rb = gen_avr_ptr(a->vrb);
 								    rd = gen_avr_ptr(a->vrt);
 								    gen_helper(rd, ra, rb);
 								    tcg_temp_free_ptr(ra);
 								    tcg_temp_free_ptr(rb);
 								    tcg_temp_free_ptr(rd);
 								    return true;
 								}
 								static bool do_vx_vmuleo(DisasContext *ctx, arg_VX *a, bool even,
 								                         void (*gen_mul)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
 								{
 								    TCGv_i64 vra, vrb, vrt0, vrt1;
 								    REQUIRE_VECTOR(ctx);
 								    vra = tcg_temp_new_i64();
 								    vrb = tcg_temp_new_i64();
 								    vrt0 = tcg_temp_new_i64();
 								    vrt1 = tcg_temp_new_i64();
 								    get_avr64(vra, a->vra, even);
 								    get_avr64(vrb, a->vrb, even);
 								    gen_mul(vrt0, vrt1, vra, vrb);
 								    set_avr64(a->vrt, vrt0, false);
 								    set_avr64(a->vrt, vrt1, true);
 								    tcg_temp_free_i64(vra);
 								    tcg_temp_free_i64(vrb);
 								    tcg_temp_free_i64(vrt0);
 								    tcg_temp_free_i64(vrt1);
 								    return true;
 								}
-												target/ppc: Moved vector multiply high and low to decodetree

Moved instructions vmulld, vmulhuw, vmulhsw, vmulhud and vmulhsd to
decodetree

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-4-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:36 +03:00
+								static bool trans_VMULLD(DisasContext *ctx, arg_VX *a)
 								{
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    tcg_gen_gvec_mul(MO_64, avr_full_offset(a->vrt), avr_full_offset(a->vra),
 								                     avr_full_offset(a->vrb), 16, 16);
 								    return true;
 								}
-												target/ppc: Fix vmul[eo]* instructions marked 2.07

Some ISA v2.03 Vector Multiply instructions marked to be ISA v2.07 only.
This patch fixes it.

Fixes: 80eca687c851 ("target/ppc: moved vector even and odd multiplication to decodetree")
Reported-by: Howard Spoelstra <hsp.cat7@gmail.com>
Suggested-by: Fabiano Rosas <farosas@linux.ibm.com>
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220304175156.2012315-2-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-05 09:16:47 +03:00
+								TRANS_FLAGS(ALTIVEC, VMULESB, do_vx_helper, gen_helper_VMULESB)
 								TRANS_FLAGS(ALTIVEC, VMULOSB, do_vx_helper, gen_helper_VMULOSB)
 								TRANS_FLAGS(ALTIVEC, VMULEUB, do_vx_helper, gen_helper_VMULEUB)
 								TRANS_FLAGS(ALTIVEC, VMULOUB, do_vx_helper, gen_helper_VMULOUB)
 								TRANS_FLAGS(ALTIVEC, VMULESH, do_vx_helper, gen_helper_VMULESH)
 								TRANS_FLAGS(ALTIVEC, VMULOSH, do_vx_helper, gen_helper_VMULOSH)
 								TRANS_FLAGS(ALTIVEC, VMULEUH, do_vx_helper, gen_helper_VMULEUH)
 								TRANS_FLAGS(ALTIVEC, VMULOUH, do_vx_helper, gen_helper_VMULOUH)
-												target/ppc: moved vector even and odd multiplication to decodetree

Moved the instructions vmulesb, vmulosb, vmuleub, vmuloub,
vmulesh, vmulosh, vmuleuh, vmulouh, vmulesw, vmulosw,
muleuw and vmulouw from legacy to decodetree. Implemented
the instructions vmulesd, vmulosd, vmuleud, vmuloud.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-3-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:36 +03:00
+								TRANS_FLAGS2(ALTIVEC_207, VMULESW, do_vx_helper, gen_helper_VMULESW)
 								TRANS_FLAGS2(ALTIVEC_207, VMULOSW, do_vx_helper, gen_helper_VMULOSW)
 								TRANS_FLAGS2(ALTIVEC_207, VMULEUW, do_vx_helper, gen_helper_VMULEUW)
 								TRANS_FLAGS2(ALTIVEC_207, VMULOUW, do_vx_helper, gen_helper_VMULOUW)
 								TRANS_FLAGS2(ISA310, VMULESD, do_vx_vmuleo, true , tcg_gen_muls2_i64)
 								TRANS_FLAGS2(ISA310, VMULOSD, do_vx_vmuleo, false, tcg_gen_muls2_i64)
 								TRANS_FLAGS2(ISA310, VMULEUD, do_vx_vmuleo, true , tcg_gen_mulu2_i64)
 								TRANS_FLAGS2(ISA310, VMULOUD, do_vx_vmuleo, false, tcg_gen_mulu2_i64)
-												target/ppc: vmulh* instructions without helpers

Changed vmulhuw, vmulhud, vmulhsw, vmulhsd to not
use helpers.

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-5-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:36 +03:00
+								static void do_vx_vmulhw_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, bool sign)
 								{
 								    TCGv_i64 hh, lh, temp;
 								    hh = tcg_temp_new_i64();
 								    lh = tcg_temp_new_i64();
 								    temp = tcg_temp_new_i64();
 								    if (sign) {
 								        tcg_gen_ext32s_i64(lh, a);
 								        tcg_gen_ext32s_i64(temp, b);
 								    } else {
-												target/ppc: use ext32u and deposit in do_vx_vmulhw_i64

Fixes: 29e9dfcf755e ("target/ppc: vmulh* instructions without helpers")
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220304175156.2012315-3-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-05 09:16:47 +03:00
+								        tcg_gen_ext32u_i64(lh, a);
 								        tcg_gen_ext32u_i64(temp, b);
-												target/ppc: vmulh* instructions without helpers

Changed vmulhuw, vmulhud, vmulhsw, vmulhsd to not
use helpers.

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-5-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:36 +03:00
+								    }
 								    tcg_gen_mul_i64(lh, lh, temp);
 								    if (sign) {
 								        tcg_gen_sari_i64(hh, a, 32);
 								        tcg_gen_sari_i64(temp, b, 32);
 								    } else {
 								        tcg_gen_shri_i64(hh, a, 32);
 								        tcg_gen_shri_i64(temp, b, 32);
 								    }
 								    tcg_gen_mul_i64(hh, hh, temp);
 								    tcg_gen_shri_i64(lh, lh, 32);
-												target/ppc: use ext32u and deposit in do_vx_vmulhw_i64

Fixes: 29e9dfcf755e ("target/ppc: vmulh* instructions without helpers")
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220304175156.2012315-3-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-05 09:16:47 +03:00
+								    tcg_gen_deposit_i64(t, hh, lh, 0, 32);
-												target/ppc: vmulh* instructions without helpers

Changed vmulhuw, vmulhud, vmulhsw, vmulhsd to not
use helpers.

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220225210936.1749575-5-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:36 +03:00
 								    tcg_temp_free_i64(hh);
 								    tcg_temp_free_i64(lh);
 								    tcg_temp_free_i64(temp);
 								}
 								static void do_vx_vmulhd_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, bool sign)
 								{
 								    TCGv_i64 tlow;
 								    tlow  = tcg_temp_new_i64();
 								    if (sign) {
 								        tcg_gen_muls2_i64(tlow, t, a, b);
 								    } else {
 								        tcg_gen_mulu2_i64(tlow, t, a, b);
 								    }
 								    tcg_temp_free_i64(tlow);
 								}
 								static bool do_vx_mulh(DisasContext *ctx, arg_VX *a, bool sign,
 								                       void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, bool))
 								{
 								    REQUIRE_INSNS_FLAGS2(ctx, ISA310);
 								    REQUIRE_VECTOR(ctx);
 								    TCGv_i64 vra, vrb, vrt;
 								    int i;
 								    vra = tcg_temp_new_i64();
 								    vrb = tcg_temp_new_i64();
 								    vrt = tcg_temp_new_i64();
 								    for (i = 0; i < 2; i++) {
 								        get_avr64(vra, a->vra, i);
 								        get_avr64(vrb, a->vrb, i);
 								        get_avr64(vrt, a->vrt, i);
 								        func(vrt, vra, vrb, sign);
 								        set_avr64(a->vrt, vrt, i);
 								    }
 								    tcg_temp_free_i64(vra);
 								    tcg_temp_free_i64(vrb);
 								    tcg_temp_free_i64(vrt);
 								    return true;
 								}
 								TRANS(VMULHSW, do_vx_mulh, true , do_vx_vmulhw_i64)
 								TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64)
 								TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64)
 								TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64)
-												target/ppc: Moved vector multiply high and low to decodetree

Moved instructions vmulld, vmulhuw, vmulhsw, vmulhud and vmulhsd to
decodetree

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-4-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>

											
										
										
											2022-03-02 08:51:36 +03:00
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								#undef GEN_VR_LDX
 								#undef GEN_VR_STX
 								#undef GEN_VR_LVE
 								#undef GEN_VR_STVE
 								#undef GEN_VX_LOGICAL
 								#undef GEN_VX_LOGICAL_207
 								#undef GEN_VXFORM
 								#undef GEN_VXFORM_207
 								#undef GEN_VXFORM_DUAL
 								#undef GEN_VXRFORM_DUAL
 								#undef GEN_VXRFORM1
 								#undef GEN_VXRFORM
-												target/ppc: Use tcg_gen_gvec_dup_imm

We can now unify the implementation of the 3 VSPLTI instructions.

Acked-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

											
										
										
											2020-03-29 00:58:36 +03:00
+								#undef GEN_VXFORM_VSPLTI
-												ppc: Move VMX ops out of translate.c

Makes things a bit more manageable

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-07-27 09:56:23 +03:00
+								#undef GEN_VXFORM_NOA
 								#undef GEN_VXFORM_UIMM
 								#undef GEN_VAFORM_PAIRED
-												target-ppc: Implement bcdcfn. instruction

bcdcfn. converts from National numeric format to BCD. National format
uses a byte to represent a digit where the most significant nibble is
always 0x3 and the least sign. nibbles is the digit itself.

Signed-off-by: Jose Ricardo Ziviani <joserz@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>

											
										
										
											2016-11-08 19:50:22 +03:00
 								#undef GEN_BCD2