2016-07-27 09:56:23 +03:00
|
|
|
|
/*
|
|
|
|
|
* translate/vmx-impl.c
|
|
|
|
|
*
|
|
|
|
|
* Altivec/VMX translation
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/*** Altivec vector extension ***/
|
|
|
|
|
/* Altivec registers moves */
|
|
|
|
|
|
|
|
|
|
static inline TCGv_ptr gen_avr_ptr(int reg)
|
|
|
|
|
{
|
|
|
|
|
TCGv_ptr r = tcg_temp_new_ptr();
|
2019-03-07 21:05:17 +03:00
|
|
|
|
tcg_gen_addi_ptr(r, cpu_env, avr_full_offset(reg));
|
2016-07-27 09:56:23 +03:00
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GEN_VR_LDX(name, opc2, opc3) \
|
2019-03-21 15:47:02 +03:00
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
{ \
|
|
|
|
|
TCGv EA; \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
TCGv_i64 avr; \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
gen_set_access_type(ctx, ACCESS_INT); \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
avr = tcg_temp_new_i64(); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
EA = tcg_temp_new(); \
|
|
|
|
|
gen_addr_reg_index(ctx, EA); \
|
|
|
|
|
tcg_gen_andi_tl(EA, EA, ~0xf); \
|
2019-03-21 15:47:02 +03:00
|
|
|
|
/* \
|
|
|
|
|
* We only need to swap high and low halves. gen_qemu_ld64_i64 \
|
|
|
|
|
* does necessary 64-bit byteswap already. \
|
|
|
|
|
*/ \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
if (ctx->le_mode) { \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
gen_qemu_ld64_i64(ctx, avr, EA); \
|
|
|
|
|
set_avr64(rD(ctx->opcode), avr, false); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
tcg_gen_addi_tl(EA, EA, 8); \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
gen_qemu_ld64_i64(ctx, avr, EA); \
|
|
|
|
|
set_avr64(rD(ctx->opcode), avr, true); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
} else { \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
gen_qemu_ld64_i64(ctx, avr, EA); \
|
|
|
|
|
set_avr64(rD(ctx->opcode), avr, true); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
tcg_gen_addi_tl(EA, EA, 8); \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
gen_qemu_ld64_i64(ctx, avr, EA); \
|
|
|
|
|
set_avr64(rD(ctx->opcode), avr, false); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GEN_VR_STX(name, opc2, opc3) \
|
|
|
|
|
static void gen_st##name(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv EA; \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
TCGv_i64 avr; \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
gen_set_access_type(ctx, ACCESS_INT); \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
avr = tcg_temp_new_i64(); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
EA = tcg_temp_new(); \
|
|
|
|
|
gen_addr_reg_index(ctx, EA); \
|
|
|
|
|
tcg_gen_andi_tl(EA, EA, ~0xf); \
|
2019-03-21 15:47:02 +03:00
|
|
|
|
/* \
|
|
|
|
|
* We only need to swap high and low halves. gen_qemu_st64_i64 \
|
|
|
|
|
* does necessary 64-bit byteswap already. \
|
|
|
|
|
*/ \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
if (ctx->le_mode) { \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
get_avr64(avr, rD(ctx->opcode), false); \
|
|
|
|
|
gen_qemu_st64_i64(ctx, avr, EA); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
tcg_gen_addi_tl(EA, EA, 8); \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
get_avr64(avr, rD(ctx->opcode), true); \
|
|
|
|
|
gen_qemu_st64_i64(ctx, avr, EA); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
} else { \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
get_avr64(avr, rD(ctx->opcode), true); \
|
|
|
|
|
gen_qemu_st64_i64(ctx, avr, EA); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
tcg_gen_addi_tl(EA, EA, 8); \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
get_avr64(avr, rD(ctx->opcode), false); \
|
|
|
|
|
gen_qemu_st64_i64(ctx, avr, EA); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GEN_VR_LVE(name, opc2, opc3, size) \
|
|
|
|
|
static void gen_lve##name(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv EA; \
|
|
|
|
|
TCGv_ptr rs; \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
gen_set_access_type(ctx, ACCESS_INT); \
|
|
|
|
|
EA = tcg_temp_new(); \
|
|
|
|
|
gen_addr_reg_index(ctx, EA); \
|
|
|
|
|
if (size > 1) { \
|
|
|
|
|
tcg_gen_andi_tl(EA, EA, ~(size - 1)); \
|
|
|
|
|
} \
|
|
|
|
|
rs = gen_avr_ptr(rS(ctx->opcode)); \
|
|
|
|
|
gen_helper_lve##name(cpu_env, rs, EA); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GEN_VR_STVE(name, opc2, opc3, size) \
|
|
|
|
|
static void gen_stve##name(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv EA; \
|
|
|
|
|
TCGv_ptr rs; \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
gen_set_access_type(ctx, ACCESS_INT); \
|
|
|
|
|
EA = tcg_temp_new(); \
|
|
|
|
|
gen_addr_reg_index(ctx, EA); \
|
|
|
|
|
if (size > 1) { \
|
|
|
|
|
tcg_gen_andi_tl(EA, EA, ~(size - 1)); \
|
|
|
|
|
} \
|
|
|
|
|
rs = gen_avr_ptr(rS(ctx->opcode)); \
|
|
|
|
|
gen_helper_stve##name(cpu_env, rs, EA); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GEN_VR_LDX(lvx, 0x07, 0x03);
|
2023-07-14 14:18:16 +03:00
|
|
|
|
/* As we don't emulate the cache, lvxl is strictly equivalent to lvx */
|
2016-07-27 09:56:23 +03:00
|
|
|
|
GEN_VR_LDX(lvxl, 0x07, 0x0B);
|
|
|
|
|
|
|
|
|
|
GEN_VR_LVE(bx, 0x07, 0x00, 1);
|
|
|
|
|
GEN_VR_LVE(hx, 0x07, 0x01, 2);
|
|
|
|
|
GEN_VR_LVE(wx, 0x07, 0x02, 4);
|
|
|
|
|
|
|
|
|
|
GEN_VR_STX(svx, 0x07, 0x07);
|
2023-07-14 14:18:16 +03:00
|
|
|
|
/* As we don't emulate the cache, stvxl is strictly equivalent to stvx */
|
2016-07-27 09:56:23 +03:00
|
|
|
|
GEN_VR_STX(svxl, 0x07, 0x0F);
|
|
|
|
|
|
|
|
|
|
GEN_VR_STVE(bx, 0x07, 0x04, 1);
|
|
|
|
|
GEN_VR_STVE(hx, 0x07, 0x05, 2);
|
|
|
|
|
GEN_VR_STVE(wx, 0x07, 0x06, 4);
|
|
|
|
|
|
|
|
|
|
static void gen_mfvscr(DisasContext *ctx)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i32 t;
|
2019-01-02 12:14:18 +03:00
|
|
|
|
TCGv_i64 avr;
|
2016-07-27 09:56:23 +03:00
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) {
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2019-01-02 12:14:18 +03:00
|
|
|
|
avr = tcg_temp_new_i64();
|
|
|
|
|
tcg_gen_movi_i64(avr, 0);
|
|
|
|
|
set_avr64(rD(ctx->opcode), avr, true);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
t = tcg_temp_new_i32();
|
2019-02-15 13:00:53 +03:00
|
|
|
|
gen_helper_mfvscr(t, cpu_env);
|
2019-01-02 12:14:18 +03:00
|
|
|
|
tcg_gen_extu_i32_i64(avr, t);
|
|
|
|
|
set_avr64(rD(ctx->opcode), avr, false);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void gen_mtvscr(DisasContext *ctx)
|
|
|
|
|
{
|
2019-02-15 13:00:50 +03:00
|
|
|
|
TCGv_i32 val;
|
|
|
|
|
int bofs;
|
|
|
|
|
|
2016-07-27 09:56:23 +03:00
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) {
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2019-02-15 13:00:50 +03:00
|
|
|
|
|
|
|
|
|
val = tcg_temp_new_i32();
|
2019-03-07 21:05:17 +03:00
|
|
|
|
bofs = avr_full_offset(rB(ctx->opcode));
|
2022-03-23 18:57:17 +03:00
|
|
|
|
#if HOST_BIG_ENDIAN
|
2019-02-15 13:00:50 +03:00
|
|
|
|
bofs += 3 * 4;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
tcg_gen_ld_i32(val, cpu_env, bofs);
|
|
|
|
|
gen_helper_mtvscr(cpu_env, val);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
}
|
|
|
|
|
|
2023-02-26 07:01:07 +03:00
|
|
|
|
static void gen_vx_vmul10(DisasContext *ctx, bool add_cin, bool ret_carry)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 t0;
|
|
|
|
|
TCGv_i64 t1;
|
|
|
|
|
TCGv_i64 t2;
|
|
|
|
|
TCGv_i64 avr;
|
|
|
|
|
TCGv_i64 ten, z;
|
|
|
|
|
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) {
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
t0 = tcg_temp_new_i64();
|
|
|
|
|
t1 = tcg_temp_new_i64();
|
|
|
|
|
t2 = tcg_temp_new_i64();
|
|
|
|
|
avr = tcg_temp_new_i64();
|
|
|
|
|
ten = tcg_constant_i64(10);
|
|
|
|
|
z = tcg_constant_i64(0);
|
|
|
|
|
|
|
|
|
|
if (add_cin) {
|
|
|
|
|
get_avr64(avr, rA(ctx->opcode), false);
|
|
|
|
|
tcg_gen_mulu2_i64(t0, t1, avr, ten);
|
|
|
|
|
get_avr64(avr, rB(ctx->opcode), false);
|
|
|
|
|
tcg_gen_andi_i64(t2, avr, 0xF);
|
|
|
|
|
tcg_gen_add2_i64(avr, t2, t0, t1, t2, z);
|
|
|
|
|
set_avr64(rD(ctx->opcode), avr, false);
|
|
|
|
|
} else {
|
|
|
|
|
get_avr64(avr, rA(ctx->opcode), false);
|
|
|
|
|
tcg_gen_mulu2_i64(avr, t2, avr, ten);
|
|
|
|
|
set_avr64(rD(ctx->opcode), avr, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (ret_carry) {
|
|
|
|
|
get_avr64(avr, rA(ctx->opcode), true);
|
|
|
|
|
tcg_gen_mulu2_i64(t0, t1, avr, ten);
|
|
|
|
|
tcg_gen_add2_i64(t0, avr, t0, t1, t2, z);
|
|
|
|
|
set_avr64(rD(ctx->opcode), avr, false);
|
|
|
|
|
set_avr64(rD(ctx->opcode), z, true);
|
|
|
|
|
} else {
|
|
|
|
|
get_avr64(avr, rA(ctx->opcode), true);
|
|
|
|
|
tcg_gen_mul_i64(t0, avr, ten);
|
|
|
|
|
tcg_gen_add_i64(avr, t0, t2);
|
|
|
|
|
set_avr64(rD(ctx->opcode), avr, true);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-24 12:14:58 +03:00
|
|
|
|
#define GEN_VX_VMUL10(name, add_cin, ret_carry) \
|
2023-02-26 07:01:07 +03:00
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
|
|
|
|
{ gen_vx_vmul10(ctx, add_cin, ret_carry); }
|
2016-10-24 12:14:58 +03:00
|
|
|
|
|
|
|
|
|
GEN_VX_VMUL10(vmul10uq, 0, 0);
|
|
|
|
|
GEN_VX_VMUL10(vmul10euq, 1, 0);
|
|
|
|
|
GEN_VX_VMUL10(vmul10cuq, 0, 1);
|
|
|
|
|
GEN_VX_VMUL10(vmul10ecuq, 1, 1);
|
|
|
|
|
|
2019-02-15 13:00:42 +03:00
|
|
|
|
#define GEN_VXFORM_V(name, vece, tcg_op, opc2, opc3) \
|
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
{ \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
\
|
2019-02-15 13:00:42 +03:00
|
|
|
|
tcg_op(vece, \
|
2019-03-07 21:05:17 +03:00
|
|
|
|
avr_full_offset(rD(ctx->opcode)), \
|
|
|
|
|
avr_full_offset(rA(ctx->opcode)), \
|
|
|
|
|
avr_full_offset(rB(ctx->opcode)), \
|
2019-02-15 13:00:42 +03:00
|
|
|
|
16, 16); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
}
|
|
|
|
|
|
2019-02-15 13:00:42 +03:00
|
|
|
|
/* Logical operations */
|
|
|
|
|
GEN_VXFORM_V(vand, MO_64, tcg_gen_gvec_and, 2, 16);
|
|
|
|
|
GEN_VXFORM_V(vandc, MO_64, tcg_gen_gvec_andc, 2, 17);
|
|
|
|
|
GEN_VXFORM_V(vor, MO_64, tcg_gen_gvec_or, 2, 18);
|
|
|
|
|
GEN_VXFORM_V(vxor, MO_64, tcg_gen_gvec_xor, 2, 19);
|
|
|
|
|
GEN_VXFORM_V(vnor, MO_64, tcg_gen_gvec_nor, 2, 20);
|
|
|
|
|
GEN_VXFORM_V(veqv, MO_64, tcg_gen_gvec_eqv, 2, 26);
|
|
|
|
|
GEN_VXFORM_V(vnand, MO_64, tcg_gen_gvec_nand, 2, 22);
|
|
|
|
|
GEN_VXFORM_V(vorc, MO_64, tcg_gen_gvec_orc, 2, 21);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
|
|
|
|
|
#define GEN_VXFORM(name, opc2, opc3) \
|
2019-03-21 15:47:02 +03:00
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr ra, rb, rd; \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
ra = gen_avr_ptr(rA(ctx->opcode)); \
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode)); \
|
2019-03-21 15:47:02 +03:00
|
|
|
|
gen_helper_##name(rd, ra, rb); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
}
|
|
|
|
|
|
2019-07-15 17:22:47 +03:00
|
|
|
|
#define GEN_VXFORM_TRANS(name, opc2, opc3) \
|
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
trans_##name(ctx); \
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-27 09:56:23 +03:00
|
|
|
|
#define GEN_VXFORM_ENV(name, opc2, opc3) \
|
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr ra, rb, rd; \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
ra = gen_avr_ptr(rA(ctx->opcode)); \
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode)); \
|
|
|
|
|
gen_helper_##name(cpu_env, rd, ra, rb); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GEN_VXFORM3(name, opc2, opc3) \
|
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr ra, rb, rc, rd; \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
ra = gen_avr_ptr(rA(ctx->opcode)); \
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
rc = gen_avr_ptr(rC(ctx->opcode)); \
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode)); \
|
|
|
|
|
gen_helper_##name(rd, ra, rb, rc); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Support for Altivec instruction pairs that use bit 31 (Rc) as
|
|
|
|
|
* an opcode bit. In general, these pairs come from different
|
|
|
|
|
* versions of the ISA, so we must also support a pair of flags for
|
|
|
|
|
* each instruction.
|
|
|
|
|
*/
|
|
|
|
|
#define GEN_VXFORM_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1) \
|
|
|
|
|
static void glue(gen_, name0##_##name1)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
if ((Rc(ctx->opcode) == 0) && \
|
|
|
|
|
((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
|
|
|
|
|
gen_##name0(ctx); \
|
|
|
|
|
} else if ((Rc(ctx->opcode) == 1) && \
|
|
|
|
|
((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
|
|
|
|
|
gen_##name1(ctx); \
|
|
|
|
|
} else { \
|
|
|
|
|
gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
2019-08-27 12:37:43 +03:00
|
|
|
|
/*
|
|
|
|
|
* We use this macro if one instruction is realized with direct
|
|
|
|
|
* translation, and second one with helper.
|
|
|
|
|
*/
|
|
|
|
|
#define GEN_VXFORM_TRANS_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1)\
|
|
|
|
|
static void glue(gen_, name0##_##name1)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
if ((Rc(ctx->opcode) == 0) && \
|
|
|
|
|
((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
trans_##name0(ctx); \
|
|
|
|
|
} else if ((Rc(ctx->opcode) == 1) && \
|
|
|
|
|
((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
|
|
|
|
|
gen_##name1(ctx); \
|
|
|
|
|
} else { \
|
|
|
|
|
gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-24 12:14:58 +03:00
|
|
|
|
/* Adds support to provide invalid mask */
|
|
|
|
|
#define GEN_VXFORM_DUAL_EXT(name0, flg0, flg2_0, inval0, \
|
|
|
|
|
name1, flg1, flg2_1, inval1) \
|
|
|
|
|
static void glue(gen_, name0##_##name1)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
if ((Rc(ctx->opcode) == 0) && \
|
|
|
|
|
((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0)) && \
|
|
|
|
|
!(ctx->opcode & inval0)) { \
|
|
|
|
|
gen_##name0(ctx); \
|
|
|
|
|
} else if ((Rc(ctx->opcode) == 1) && \
|
|
|
|
|
((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1)) && \
|
|
|
|
|
!(ctx->opcode & inval1)) { \
|
|
|
|
|
gen_##name1(ctx); \
|
|
|
|
|
} else { \
|
|
|
|
|
gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-28 10:56:42 +03:00
|
|
|
|
#define GEN_VXFORM_HETRO(name, opc2, opc3) \
|
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr rb; \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
gen_helper_##name(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)], rb); \
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-15 13:00:43 +03:00
|
|
|
|
GEN_VXFORM_V(vaddubm, MO_8, tcg_gen_gvec_add, 0, 0);
|
2016-10-24 12:14:58 +03:00
|
|
|
|
GEN_VXFORM_DUAL_EXT(vaddubm, PPC_ALTIVEC, PPC_NONE, 0, \
|
|
|
|
|
vmul10cuq, PPC_NONE, PPC2_ISA300, 0x0000F800)
|
2019-02-15 13:00:43 +03:00
|
|
|
|
GEN_VXFORM_V(vadduhm, MO_16, tcg_gen_gvec_add, 0, 1);
|
2016-10-24 12:14:58 +03:00
|
|
|
|
GEN_VXFORM_DUAL(vadduhm, PPC_ALTIVEC, PPC_NONE, \
|
|
|
|
|
vmul10ecuq, PPC_NONE, PPC2_ISA300)
|
2019-02-15 13:00:43 +03:00
|
|
|
|
GEN_VXFORM_V(vadduwm, MO_32, tcg_gen_gvec_add, 0, 2);
|
|
|
|
|
GEN_VXFORM_V(vaddudm, MO_64, tcg_gen_gvec_add, 0, 3);
|
|
|
|
|
GEN_VXFORM_V(vsububm, MO_8, tcg_gen_gvec_sub, 0, 16);
|
|
|
|
|
GEN_VXFORM_V(vsubuhm, MO_16, tcg_gen_gvec_sub, 0, 17);
|
|
|
|
|
GEN_VXFORM_V(vsubuwm, MO_32, tcg_gen_gvec_sub, 0, 18);
|
|
|
|
|
GEN_VXFORM_V(vsubudm, MO_64, tcg_gen_gvec_sub, 0, 19);
|
2019-02-15 13:00:58 +03:00
|
|
|
|
GEN_VXFORM_V(vmaxub, MO_8, tcg_gen_gvec_umax, 1, 0);
|
|
|
|
|
GEN_VXFORM_V(vmaxuh, MO_16, tcg_gen_gvec_umax, 1, 1);
|
|
|
|
|
GEN_VXFORM_V(vmaxuw, MO_32, tcg_gen_gvec_umax, 1, 2);
|
|
|
|
|
GEN_VXFORM_V(vmaxud, MO_64, tcg_gen_gvec_umax, 1, 3);
|
|
|
|
|
GEN_VXFORM_V(vmaxsb, MO_8, tcg_gen_gvec_smax, 1, 4);
|
|
|
|
|
GEN_VXFORM_V(vmaxsh, MO_16, tcg_gen_gvec_smax, 1, 5);
|
|
|
|
|
GEN_VXFORM_V(vmaxsw, MO_32, tcg_gen_gvec_smax, 1, 6);
|
|
|
|
|
GEN_VXFORM_V(vmaxsd, MO_64, tcg_gen_gvec_smax, 1, 7);
|
|
|
|
|
GEN_VXFORM_V(vminub, MO_8, tcg_gen_gvec_umin, 1, 8);
|
|
|
|
|
GEN_VXFORM_V(vminuh, MO_16, tcg_gen_gvec_umin, 1, 9);
|
|
|
|
|
GEN_VXFORM_V(vminuw, MO_32, tcg_gen_gvec_umin, 1, 10);
|
|
|
|
|
GEN_VXFORM_V(vminud, MO_64, tcg_gen_gvec_umin, 1, 11);
|
|
|
|
|
GEN_VXFORM_V(vminsb, MO_8, tcg_gen_gvec_smin, 1, 12);
|
|
|
|
|
GEN_VXFORM_V(vminsh, MO_16, tcg_gen_gvec_smin, 1, 13);
|
|
|
|
|
GEN_VXFORM_V(vminsw, MO_32, tcg_gen_gvec_smin, 1, 14);
|
|
|
|
|
GEN_VXFORM_V(vminsd, MO_64, tcg_gen_gvec_smin, 1, 15);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
GEN_VXFORM(vmrghb, 6, 0);
|
|
|
|
|
GEN_VXFORM(vmrghh, 6, 1);
|
|
|
|
|
GEN_VXFORM(vmrghw, 6, 2);
|
|
|
|
|
GEN_VXFORM(vmrglb, 6, 4);
|
|
|
|
|
GEN_VXFORM(vmrglh, 6, 5);
|
|
|
|
|
GEN_VXFORM(vmrglw, 6, 6);
|
|
|
|
|
|
2019-08-27 12:37:43 +03:00
|
|
|
|
static void trans_vmrgew(DisasContext *ctx)
|
2016-07-27 09:56:23 +03:00
|
|
|
|
{
|
2019-08-27 12:37:43 +03:00
|
|
|
|
int VT = rD(ctx->opcode);
|
|
|
|
|
int VA = rA(ctx->opcode);
|
|
|
|
|
int VB = rB(ctx->opcode);
|
|
|
|
|
TCGv_i64 tmp = tcg_temp_new_i64();
|
|
|
|
|
TCGv_i64 avr = tcg_temp_new_i64();
|
2019-01-02 12:14:18 +03:00
|
|
|
|
|
|
|
|
|
get_avr64(avr, VB, true);
|
|
|
|
|
tcg_gen_shri_i64(tmp, avr, 32);
|
|
|
|
|
get_avr64(avr, VA, true);
|
|
|
|
|
tcg_gen_deposit_i64(avr, avr, tmp, 0, 32);
|
|
|
|
|
set_avr64(VT, avr, true);
|
|
|
|
|
|
|
|
|
|
get_avr64(avr, VB, false);
|
|
|
|
|
tcg_gen_shri_i64(tmp, avr, 32);
|
|
|
|
|
get_avr64(avr, VA, false);
|
|
|
|
|
tcg_gen_deposit_i64(avr, avr, tmp, 0, 32);
|
|
|
|
|
set_avr64(VT, avr, false);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
}
|
|
|
|
|
|
2019-08-27 12:37:43 +03:00
|
|
|
|
static void trans_vmrgow(DisasContext *ctx)
|
2016-07-27 09:56:23 +03:00
|
|
|
|
{
|
2019-08-27 12:37:43 +03:00
|
|
|
|
int VT = rD(ctx->opcode);
|
|
|
|
|
int VA = rA(ctx->opcode);
|
|
|
|
|
int VB = rB(ctx->opcode);
|
|
|
|
|
TCGv_i64 t0 = tcg_temp_new_i64();
|
|
|
|
|
TCGv_i64 t1 = tcg_temp_new_i64();
|
|
|
|
|
TCGv_i64 avr = tcg_temp_new_i64();
|
2019-01-02 12:14:18 +03:00
|
|
|
|
|
|
|
|
|
get_avr64(t0, VB, true);
|
|
|
|
|
get_avr64(t1, VA, true);
|
|
|
|
|
tcg_gen_deposit_i64(avr, t0, t1, 32, 32);
|
|
|
|
|
set_avr64(VT, avr, true);
|
|
|
|
|
|
|
|
|
|
get_avr64(t0, VB, false);
|
|
|
|
|
get_avr64(t1, VA, false);
|
|
|
|
|
tcg_gen_deposit_i64(avr, t0, t1, 32, 32);
|
|
|
|
|
set_avr64(VT, avr, false);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
}
|
|
|
|
|
|
2019-07-15 17:22:47 +03:00
|
|
|
|
/*
|
|
|
|
|
* lvsl VRT,RA,RB - Load Vector for Shift Left
|
|
|
|
|
*
|
|
|
|
|
* Let the EA be the sum (rA|0)+(rB). Let sh=EA[28–31].
|
|
|
|
|
* Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F.
|
|
|
|
|
* Bytes sh:sh+15 of X are placed into vD.
|
|
|
|
|
*/
|
|
|
|
|
static void trans_lvsl(DisasContext *ctx)
|
|
|
|
|
{
|
|
|
|
|
int VT = rD(ctx->opcode);
|
|
|
|
|
TCGv_i64 result = tcg_temp_new_i64();
|
|
|
|
|
TCGv_i64 sh = tcg_temp_new_i64();
|
|
|
|
|
TCGv EA = tcg_temp_new();
|
|
|
|
|
|
|
|
|
|
/* Get sh(from description) by anding EA with 0xf. */
|
|
|
|
|
gen_addr_reg_index(ctx, EA);
|
|
|
|
|
tcg_gen_extu_tl_i64(sh, EA);
|
|
|
|
|
tcg_gen_andi_i64(sh, sh, 0xfULL);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Create bytes sh:sh+7 of X(from description) and place them in
|
|
|
|
|
* higher doubleword of vD.
|
|
|
|
|
*/
|
|
|
|
|
tcg_gen_muli_i64(sh, sh, 0x0101010101010101ULL);
|
|
|
|
|
tcg_gen_addi_i64(result, sh, 0x0001020304050607ull);
|
|
|
|
|
set_avr64(VT, result, true);
|
|
|
|
|
/*
|
|
|
|
|
* Create bytes sh+8:sh+15 of X(from description) and place them in
|
|
|
|
|
* lower doubleword of vD.
|
|
|
|
|
*/
|
|
|
|
|
tcg_gen_addi_i64(result, sh, 0x08090a0b0c0d0e0fULL);
|
|
|
|
|
set_avr64(VT, result, false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* lvsr VRT,RA,RB - Load Vector for Shift Right
|
|
|
|
|
*
|
|
|
|
|
* Let the EA be the sum (rA|0)+(rB). Let sh=EA[28–31].
|
|
|
|
|
* Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F.
|
|
|
|
|
* Bytes (16-sh):(31-sh) of X are placed into vD.
|
|
|
|
|
*/
|
|
|
|
|
static void trans_lvsr(DisasContext *ctx)
|
|
|
|
|
{
|
|
|
|
|
int VT = rD(ctx->opcode);
|
|
|
|
|
TCGv_i64 result = tcg_temp_new_i64();
|
|
|
|
|
TCGv_i64 sh = tcg_temp_new_i64();
|
|
|
|
|
TCGv EA = tcg_temp_new();
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Get sh(from description) by anding EA with 0xf. */
|
|
|
|
|
gen_addr_reg_index(ctx, EA);
|
|
|
|
|
tcg_gen_extu_tl_i64(sh, EA);
|
|
|
|
|
tcg_gen_andi_i64(sh, sh, 0xfULL);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Create bytes (16-sh):(23-sh) of X(from description) and place them in
|
|
|
|
|
* higher doubleword of vD.
|
|
|
|
|
*/
|
|
|
|
|
tcg_gen_muli_i64(sh, sh, 0x0101010101010101ULL);
|
|
|
|
|
tcg_gen_subfi_i64(result, 0x1011121314151617ULL, sh);
|
|
|
|
|
set_avr64(VT, result, true);
|
|
|
|
|
/*
|
|
|
|
|
* Create bytes (24-sh):(32-sh) of X(from description) and place them in
|
|
|
|
|
* lower doubleword of vD.
|
|
|
|
|
*/
|
|
|
|
|
tcg_gen_subfi_i64(result, 0x18191a1b1c1d1e1fULL, sh);
|
|
|
|
|
set_avr64(VT, result, false);
|
|
|
|
|
}
|
|
|
|
|
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
/*
|
|
|
|
|
* vsl VRT,VRA,VRB - Vector Shift Left
|
|
|
|
|
*
|
|
|
|
|
* Shifting left 128 bit value of vA by value specified in bits 125-127 of vB.
|
|
|
|
|
* Lowest 3 bits in each byte element of register vB must be identical or
|
|
|
|
|
* result is undefined.
|
|
|
|
|
*/
|
|
|
|
|
static void trans_vsl(DisasContext *ctx)
|
|
|
|
|
{
|
|
|
|
|
int VT = rD(ctx->opcode);
|
|
|
|
|
int VA = rA(ctx->opcode);
|
|
|
|
|
int VB = rB(ctx->opcode);
|
2019-10-04 16:43:59 +03:00
|
|
|
|
TCGv_i64 avr = tcg_temp_new_i64();
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
TCGv_i64 sh = tcg_temp_new_i64();
|
2019-10-04 16:43:59 +03:00
|
|
|
|
TCGv_i64 carry = tcg_temp_new_i64();
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
TCGv_i64 tmp = tcg_temp_new_i64();
|
|
|
|
|
|
2019-10-04 16:43:59 +03:00
|
|
|
|
/* Place bits 125-127 of vB in 'sh'. */
|
|
|
|
|
get_avr64(avr, VB, false);
|
|
|
|
|
tcg_gen_andi_i64(sh, avr, 0x07ULL);
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
|
|
|
|
|
/*
|
2019-10-04 16:43:59 +03:00
|
|
|
|
* Save highest 'sh' bits of lower doubleword element of vA in variable
|
|
|
|
|
* 'carry' and perform shift on lower doubleword.
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
*/
|
2019-10-04 16:43:59 +03:00
|
|
|
|
get_avr64(avr, VA, false);
|
|
|
|
|
tcg_gen_subfi_i64(tmp, 32, sh);
|
|
|
|
|
tcg_gen_shri_i64(carry, avr, 32);
|
|
|
|
|
tcg_gen_shr_i64(carry, carry, tmp);
|
|
|
|
|
tcg_gen_shl_i64(avr, avr, sh);
|
|
|
|
|
set_avr64(VT, avr, false);
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Perform shift on higher doubleword element of vA and replace lowest
|
2019-10-04 16:43:59 +03:00
|
|
|
|
* 'sh' bits with 'carry'.
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
*/
|
2019-10-04 16:43:59 +03:00
|
|
|
|
get_avr64(avr, VA, true);
|
|
|
|
|
tcg_gen_shl_i64(avr, avr, sh);
|
|
|
|
|
tcg_gen_or_i64(avr, avr, carry);
|
|
|
|
|
set_avr64(VT, avr, true);
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* vsr VRT,VRA,VRB - Vector Shift Right
|
|
|
|
|
*
|
|
|
|
|
* Shifting right 128 bit value of vA by value specified in bits 125-127 of vB.
|
|
|
|
|
* Lowest 3 bits in each byte element of register vB must be identical or
|
|
|
|
|
* result is undefined.
|
|
|
|
|
*/
|
|
|
|
|
static void trans_vsr(DisasContext *ctx)
|
|
|
|
|
{
|
|
|
|
|
int VT = rD(ctx->opcode);
|
|
|
|
|
int VA = rA(ctx->opcode);
|
|
|
|
|
int VB = rB(ctx->opcode);
|
2019-10-04 16:43:59 +03:00
|
|
|
|
TCGv_i64 avr = tcg_temp_new_i64();
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
TCGv_i64 sh = tcg_temp_new_i64();
|
2019-10-04 16:43:59 +03:00
|
|
|
|
TCGv_i64 carry = tcg_temp_new_i64();
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
TCGv_i64 tmp = tcg_temp_new_i64();
|
|
|
|
|
|
2019-10-04 16:43:59 +03:00
|
|
|
|
/* Place bits 125-127 of vB in 'sh'. */
|
|
|
|
|
get_avr64(avr, VB, false);
|
|
|
|
|
tcg_gen_andi_i64(sh, avr, 0x07ULL);
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
|
|
|
|
|
/*
|
2019-10-04 16:43:59 +03:00
|
|
|
|
* Save lowest 'sh' bits of higher doubleword element of vA in variable
|
|
|
|
|
* 'carry' and perform shift on higher doubleword.
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
*/
|
2019-10-04 16:43:59 +03:00
|
|
|
|
get_avr64(avr, VA, true);
|
|
|
|
|
tcg_gen_subfi_i64(tmp, 32, sh);
|
|
|
|
|
tcg_gen_shli_i64(carry, avr, 32);
|
|
|
|
|
tcg_gen_shl_i64(carry, carry, tmp);
|
|
|
|
|
tcg_gen_shr_i64(avr, avr, sh);
|
|
|
|
|
set_avr64(VT, avr, true);
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
/*
|
|
|
|
|
* Perform shift on lower doubleword element of vA and replace highest
|
2019-10-04 16:43:59 +03:00
|
|
|
|
* 'sh' bits with 'carry'.
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
*/
|
2019-10-04 16:43:59 +03:00
|
|
|
|
get_avr64(avr, VA, false);
|
|
|
|
|
tcg_gen_shr_i64(avr, avr, sh);
|
|
|
|
|
tcg_gen_or_i64(avr, avr, carry);
|
|
|
|
|
set_avr64(VT, avr, false);
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
}
|
|
|
|
|
|
target/ppc: Optimize emulation of vgbbd instruction
Optimize altivec instruction vgbbd (Vector Gather Bits by Bytes by Doubleword)
All ith bits (i in range 1 to 8) of each byte of doubleword element in
source register are concatenated and placed into ith byte of appropriate
doubleword element in destination register.
Following solution is done for both doubleword elements of source register
in parallel, in order to reduce the number of instructions needed(that's why
arrays are used):
First, both doubleword elements of source register vB are placed in
appropriate element of array avr. Bits are gathered in 2x8 iterations(2 for
loops). In first iteration bit 1 of byte 1, bit 2 of byte 2,... bit 8 of
byte 8 are in their final spots so avr[i], i={0,1} can be and-ed with
tcg_mask. For every following iteration, both avr[i] and tcg_mask variables
have to be shifted right for 7 and 8 places, respectively, in order to get
bit 1 of byte 2, bit 2 of byte 3.. bit 7 of byte 8 in their final spots so
shifted avr values(saved in tmp) can be and-ed with new value of tcg_mask...
After first 8 iteration(first loop), all the first bits are in their final
places, all second bits but second bit from eight byte are in their places...
only 1 eight bit from eight byte is in it's place). In second loop we do all
operations symmetrically, in order to get other half of bits in their final
spots. Results for first and second doubleword elements are saved in
result[0] and result[1] respectively. In the end those results are saved in
appropriate doubleword element of destination register vD.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-5-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:50 +03:00
|
|
|
|
/*
|
|
|
|
|
* vgbbd VRT,VRB - Vector Gather Bits by Bytes by Doubleword
|
|
|
|
|
*
|
|
|
|
|
* All ith bits (i in range 1 to 8) of each byte of doubleword element in source
|
|
|
|
|
* register are concatenated and placed into ith byte of appropriate doubleword
|
|
|
|
|
* element in destination register.
|
|
|
|
|
*
|
|
|
|
|
* Following solution is done for both doubleword elements of source register
|
|
|
|
|
* in parallel, in order to reduce the number of instructions needed(that's why
|
|
|
|
|
* arrays are used):
|
|
|
|
|
* First, both doubleword elements of source register vB are placed in
|
|
|
|
|
* appropriate element of array avr. Bits are gathered in 2x8 iterations(2 for
|
|
|
|
|
* loops). In first iteration bit 1 of byte 1, bit 2 of byte 2,... bit 8 of
|
|
|
|
|
* byte 8 are in their final spots so avr[i], i={0,1} can be and-ed with
|
|
|
|
|
* tcg_mask. For every following iteration, both avr[i] and tcg_mask variables
|
|
|
|
|
* have to be shifted right for 7 and 8 places, respectively, in order to get
|
|
|
|
|
* bit 1 of byte 2, bit 2 of byte 3.. bit 7 of byte 8 in their final spots so
|
|
|
|
|
* shifted avr values(saved in tmp) can be and-ed with new value of tcg_mask...
|
|
|
|
|
* After first 8 iteration(first loop), all the first bits are in their final
|
|
|
|
|
* places, all second bits but second bit from eight byte are in their places...
|
|
|
|
|
* only 1 eight bit from eight byte is in it's place). In second loop we do all
|
|
|
|
|
* operations symmetrically, in order to get other half of bits in their final
|
|
|
|
|
* spots. Results for first and second doubleword elements are saved in
|
|
|
|
|
* result[0] and result[1] respectively. In the end those results are saved in
|
|
|
|
|
* appropriate doubleword element of destination register vD.
|
|
|
|
|
*/
|
|
|
|
|
static void trans_vgbbd(DisasContext *ctx)
|
|
|
|
|
{
|
|
|
|
|
int VT = rD(ctx->opcode);
|
|
|
|
|
int VB = rB(ctx->opcode);
|
|
|
|
|
TCGv_i64 tmp = tcg_temp_new_i64();
|
|
|
|
|
uint64_t mask = 0x8040201008040201ULL;
|
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
|
|
TCGv_i64 result[2];
|
|
|
|
|
result[0] = tcg_temp_new_i64();
|
|
|
|
|
result[1] = tcg_temp_new_i64();
|
|
|
|
|
TCGv_i64 avr[2];
|
|
|
|
|
avr[0] = tcg_temp_new_i64();
|
|
|
|
|
avr[1] = tcg_temp_new_i64();
|
|
|
|
|
TCGv_i64 tcg_mask = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
tcg_gen_movi_i64(tcg_mask, mask);
|
|
|
|
|
for (j = 0; j < 2; j++) {
|
|
|
|
|
get_avr64(avr[j], VB, j);
|
|
|
|
|
tcg_gen_and_i64(result[j], avr[j], tcg_mask);
|
|
|
|
|
}
|
|
|
|
|
for (i = 1; i < 8; i++) {
|
|
|
|
|
tcg_gen_movi_i64(tcg_mask, mask >> (i * 8));
|
|
|
|
|
for (j = 0; j < 2; j++) {
|
|
|
|
|
tcg_gen_shri_i64(tmp, avr[j], i * 7);
|
|
|
|
|
tcg_gen_and_i64(tmp, tmp, tcg_mask);
|
|
|
|
|
tcg_gen_or_i64(result[j], result[j], tmp);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for (i = 1; i < 8; i++) {
|
|
|
|
|
tcg_gen_movi_i64(tcg_mask, mask << (i * 8));
|
|
|
|
|
for (j = 0; j < 2; j++) {
|
|
|
|
|
tcg_gen_shli_i64(tmp, avr[j], i * 7);
|
|
|
|
|
tcg_gen_and_i64(tmp, tmp, tcg_mask);
|
|
|
|
|
tcg_gen_or_i64(result[j], result[j], tmp);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
for (j = 0; j < 2; j++) {
|
|
|
|
|
set_avr64(VT, result[j], j);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-15 17:22:52 +03:00
|
|
|
|
/*
|
|
|
|
|
* vclzw VRT,VRB - Vector Count Leading Zeros Word
|
|
|
|
|
*
|
|
|
|
|
* Counting the number of leading zero bits of each word element in source
|
|
|
|
|
* register and placing result in appropriate word element of destination
|
|
|
|
|
* register.
|
|
|
|
|
*/
|
|
|
|
|
static void trans_vclzw(DisasContext *ctx)
|
|
|
|
|
{
|
|
|
|
|
int VT = rD(ctx->opcode);
|
|
|
|
|
int VB = rB(ctx->opcode);
|
|
|
|
|
TCGv_i32 tmp = tcg_temp_new_i32();
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
/* Perform count for every word element using tcg_gen_clzi_i32. */
|
|
|
|
|
for (i = 0; i < 4; i++) {
|
|
|
|
|
tcg_gen_ld_i32(tmp, cpu_env,
|
|
|
|
|
offsetof(CPUPPCState, vsr[32 + VB].u64[0]) + i * 4);
|
|
|
|
|
tcg_gen_clzi_i32(tmp, tmp, 32);
|
|
|
|
|
tcg_gen_st_i32(tmp, cpu_env,
|
|
|
|
|
offsetof(CPUPPCState, vsr[32 + VT].u64[0]) + i * 4);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-15 17:22:51 +03:00
|
|
|
|
/*
|
|
|
|
|
* vclzd VRT,VRB - Vector Count Leading Zeros Doubleword
|
|
|
|
|
*
|
|
|
|
|
* Counting the number of leading zero bits of each doubleword element in source
|
|
|
|
|
* register and placing result in appropriate doubleword element of destination
|
|
|
|
|
* register.
|
|
|
|
|
*/
|
|
|
|
|
static void trans_vclzd(DisasContext *ctx)
|
|
|
|
|
{
|
|
|
|
|
int VT = rD(ctx->opcode);
|
|
|
|
|
int VB = rB(ctx->opcode);
|
|
|
|
|
TCGv_i64 avr = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
/* high doubleword */
|
|
|
|
|
get_avr64(avr, VB, true);
|
|
|
|
|
tcg_gen_clzi_i64(avr, avr, 64);
|
|
|
|
|
set_avr64(VT, avr, true);
|
|
|
|
|
|
|
|
|
|
/* low doubleword */
|
|
|
|
|
get_avr64(avr, VB, false);
|
|
|
|
|
tcg_gen_clzi_i64(avr, avr, 64);
|
|
|
|
|
set_avr64(VT, avr, false);
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-02 02:43:39 +03:00
|
|
|
|
GEN_VXFORM_V(vmuluwm, MO_32, tcg_gen_gvec_mul, 4, 2);
|
2016-07-28 21:14:17 +03:00
|
|
|
|
GEN_VXFORM(vsrv, 2, 28);
|
2016-07-28 21:14:16 +03:00
|
|
|
|
GEN_VXFORM(vslv, 2, 29);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
GEN_VXFORM(vslo, 6, 16);
|
|
|
|
|
GEN_VXFORM(vsro, 6, 17);
|
2019-02-15 13:00:57 +03:00
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool do_vector_gvec3_VX(DisasContext *ctx, arg_VX *a, int vece,
|
|
|
|
|
void (*gen_gvec)(unsigned, uint32_t, uint32_t,
|
|
|
|
|
uint32_t, uint32_t, uint32_t))
|
|
|
|
|
{
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
gen_gvec(vece, avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VSLB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_shlv);
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VSLH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_shlv);
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VSLW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_shlv);
|
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VSLD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_shlv);
|
|
|
|
|
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VSRB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_shrv);
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VSRH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_shrv);
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VSRW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_shrv);
|
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VSRD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_shrv);
|
|
|
|
|
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VSRAB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_sarv);
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VSRAH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_sarv);
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VSRAW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_sarv);
|
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VSRAD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_sarv);
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
TRANS_FLAGS(ALTIVEC, VRLB, do_vector_gvec3_VX, MO_8, tcg_gen_gvec_rotlv)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VRLH, do_vector_gvec3_VX, MO_16, tcg_gen_gvec_rotlv)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VRLW, do_vector_gvec3_VX, MO_32, tcg_gen_gvec_rotlv)
|
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VRLD, do_vector_gvec3_VX, MO_64, tcg_gen_gvec_rotlv)
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static TCGv_vec do_vrl_mask_vec(unsigned vece, TCGv_vec vrb)
|
|
|
|
|
{
|
|
|
|
|
TCGv_vec t0 = tcg_temp_new_vec_matching(vrb),
|
|
|
|
|
t1 = tcg_temp_new_vec_matching(vrb),
|
|
|
|
|
t2 = tcg_temp_new_vec_matching(vrb),
|
|
|
|
|
ones = tcg_constant_vec_matching(vrb, vece, -1);
|
|
|
|
|
|
|
|
|
|
/* Extract b and e */
|
|
|
|
|
tcg_gen_dupi_vec(vece, t2, (8 << vece) - 1);
|
|
|
|
|
|
|
|
|
|
tcg_gen_shri_vec(vece, t0, vrb, 16);
|
|
|
|
|
tcg_gen_and_vec(vece, t0, t0, t2);
|
|
|
|
|
|
|
|
|
|
tcg_gen_shri_vec(vece, t1, vrb, 8);
|
|
|
|
|
tcg_gen_and_vec(vece, t1, t1, t2);
|
|
|
|
|
|
|
|
|
|
/* Compare b and e to negate the mask where begin > end */
|
|
|
|
|
tcg_gen_cmp_vec(TCG_COND_GT, vece, t2, t0, t1);
|
|
|
|
|
|
|
|
|
|
/* Create the mask with (~0 >> b) ^ ((~0 >> e) >> 1) */
|
|
|
|
|
tcg_gen_shrv_vec(vece, t0, ones, t0);
|
|
|
|
|
tcg_gen_shrv_vec(vece, t1, ones, t1);
|
|
|
|
|
tcg_gen_shri_vec(vece, t1, t1, 1);
|
|
|
|
|
tcg_gen_xor_vec(vece, t0, t0, t1);
|
|
|
|
|
|
|
|
|
|
/* negate the mask */
|
|
|
|
|
tcg_gen_xor_vec(vece, t0, t0, t2);
|
|
|
|
|
|
|
|
|
|
return t0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void gen_vrlnm_vec(unsigned vece, TCGv_vec vrt, TCGv_vec vra,
|
|
|
|
|
TCGv_vec vrb)
|
|
|
|
|
{
|
|
|
|
|
TCGv_vec mask, n = tcg_temp_new_vec_matching(vrt);
|
|
|
|
|
|
|
|
|
|
/* Create the mask */
|
|
|
|
|
mask = do_vrl_mask_vec(vece, vrb);
|
|
|
|
|
|
|
|
|
|
/* Extract n */
|
|
|
|
|
tcg_gen_dupi_vec(vece, n, (8 << vece) - 1);
|
|
|
|
|
tcg_gen_and_vec(vece, n, vrb, n);
|
|
|
|
|
|
|
|
|
|
/* Rotate and mask */
|
|
|
|
|
tcg_gen_rotlv_vec(vece, vrt, vra, n);
|
|
|
|
|
tcg_gen_and_vec(vece, vrt, vrt, mask);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool do_vrlnm(DisasContext *ctx, arg_VX *a, int vece)
|
|
|
|
|
{
|
|
|
|
|
static const TCGOpcode vecop_list[] = {
|
|
|
|
|
INDEX_op_cmp_vec, INDEX_op_rotlv_vec, INDEX_op_sari_vec,
|
|
|
|
|
INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_shrv_vec, 0
|
|
|
|
|
};
|
|
|
|
|
static const GVecGen3 ops[2] = {
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vrlnm_vec,
|
|
|
|
|
.fno = gen_helper_VRLWNM,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.load_dest = true,
|
|
|
|
|
.vece = MO_32
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vrlnm_vec,
|
|
|
|
|
.fno = gen_helper_VRLDNM,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.load_dest = true,
|
|
|
|
|
.vece = MO_64
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA300);
|
|
|
|
|
REQUIRE_VSX(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16, &ops[vece - 2]);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VRLWNM, do_vrlnm, MO_32)
|
|
|
|
|
TRANS(VRLDNM, do_vrlnm, MO_64)
|
|
|
|
|
|
|
|
|
|
static void gen_vrlmi_vec(unsigned vece, TCGv_vec vrt, TCGv_vec vra,
|
|
|
|
|
TCGv_vec vrb)
|
|
|
|
|
{
|
|
|
|
|
TCGv_vec mask, n = tcg_temp_new_vec_matching(vrt),
|
|
|
|
|
tmp = tcg_temp_new_vec_matching(vrt);
|
|
|
|
|
|
|
|
|
|
/* Create the mask */
|
|
|
|
|
mask = do_vrl_mask_vec(vece, vrb);
|
|
|
|
|
|
|
|
|
|
/* Extract n */
|
|
|
|
|
tcg_gen_dupi_vec(vece, n, (8 << vece) - 1);
|
|
|
|
|
tcg_gen_and_vec(vece, n, vrb, n);
|
|
|
|
|
|
|
|
|
|
/* Rotate and insert */
|
|
|
|
|
tcg_gen_rotlv_vec(vece, tmp, vra, n);
|
|
|
|
|
tcg_gen_bitsel_vec(vece, vrt, mask, tmp, vrt);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool do_vrlmi(DisasContext *ctx, arg_VX *a, int vece)
|
|
|
|
|
{
|
|
|
|
|
static const TCGOpcode vecop_list[] = {
|
|
|
|
|
INDEX_op_cmp_vec, INDEX_op_rotlv_vec, INDEX_op_sari_vec,
|
|
|
|
|
INDEX_op_shli_vec, INDEX_op_shri_vec, INDEX_op_shrv_vec, 0
|
|
|
|
|
};
|
|
|
|
|
static const GVecGen3 ops[2] = {
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vrlmi_vec,
|
|
|
|
|
.fno = gen_helper_VRLWMI,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.load_dest = true,
|
|
|
|
|
.vece = MO_32
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vrlnm_vec,
|
|
|
|
|
.fno = gen_helper_VRLDMI,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.load_dest = true,
|
|
|
|
|
.vece = MO_64
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA300);
|
|
|
|
|
REQUIRE_VSX(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16, &ops[vece - 2]);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VRLWMI, do_vrlmi, MO_32)
|
|
|
|
|
TRANS(VRLDMI, do_vrlmi, MO_64)
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool do_vector_shift_quad(DisasContext *ctx, arg_VX *a, bool right,
|
|
|
|
|
bool alg)
|
2022-03-02 08:51:37 +03:00
|
|
|
|
{
|
2022-03-02 08:51:37 +03:00
|
|
|
|
TCGv_i64 hi, lo, t0, t1, n, zero = tcg_constant_i64(0);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
n = tcg_temp_new_i64();
|
|
|
|
|
hi = tcg_temp_new_i64();
|
|
|
|
|
lo = tcg_temp_new_i64();
|
|
|
|
|
t0 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(lo, a->vra, false);
|
|
|
|
|
get_avr64(hi, a->vra, true);
|
|
|
|
|
|
|
|
|
|
get_avr64(n, a->vrb, true);
|
|
|
|
|
|
|
|
|
|
tcg_gen_andi_i64(t0, n, 64);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
if (right) {
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_NE, lo, t0, zero, hi, lo);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
if (alg) {
|
2023-02-26 07:16:43 +03:00
|
|
|
|
t1 = tcg_temp_new_i64();
|
2022-03-02 08:51:37 +03:00
|
|
|
|
tcg_gen_sari_i64(t1, lo, 63);
|
2023-02-26 07:16:43 +03:00
|
|
|
|
} else {
|
|
|
|
|
t1 = zero;
|
2022-03-02 08:51:37 +03:00
|
|
|
|
}
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_NE, hi, t0, zero, t1, hi);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
} else {
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_NE, hi, t0, zero, lo, hi);
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_NE, lo, t0, zero, zero, lo);
|
|
|
|
|
}
|
2022-03-02 08:51:37 +03:00
|
|
|
|
tcg_gen_andi_i64(n, n, 0x3F);
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
if (right) {
|
2022-03-02 08:51:37 +03:00
|
|
|
|
if (alg) {
|
|
|
|
|
tcg_gen_sar_i64(t0, hi, n);
|
|
|
|
|
} else {
|
|
|
|
|
tcg_gen_shr_i64(t0, hi, n);
|
|
|
|
|
}
|
2022-03-02 08:51:37 +03:00
|
|
|
|
} else {
|
|
|
|
|
tcg_gen_shl_i64(t0, lo, n);
|
|
|
|
|
}
|
|
|
|
|
set_avr64(a->vrt, t0, right);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
if (right) {
|
|
|
|
|
tcg_gen_shr_i64(lo, lo, n);
|
|
|
|
|
} else {
|
|
|
|
|
tcg_gen_shl_i64(hi, hi, n);
|
|
|
|
|
}
|
2022-03-02 08:51:37 +03:00
|
|
|
|
tcg_gen_xori_i64(n, n, 63);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
if (right) {
|
|
|
|
|
tcg_gen_shl_i64(hi, hi, n);
|
|
|
|
|
tcg_gen_shli_i64(hi, hi, 1);
|
|
|
|
|
} else {
|
|
|
|
|
tcg_gen_shr_i64(lo, lo, n);
|
|
|
|
|
tcg_gen_shri_i64(lo, lo, 1);
|
|
|
|
|
}
|
2022-03-02 08:51:37 +03:00
|
|
|
|
tcg_gen_or_i64(hi, hi, lo);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
set_avr64(a->vrt, hi, !right);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
TRANS_FLAGS2(ISA310, VSLQ, do_vector_shift_quad, false, false);
|
|
|
|
|
TRANS_FLAGS2(ISA310, VSRQ, do_vector_shift_quad, true, false);
|
|
|
|
|
TRANS_FLAGS2(ISA310, VSRAQ, do_vector_shift_quad, true, true);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static void do_vrlq_mask(TCGv_i64 mh, TCGv_i64 ml, TCGv_i64 b, TCGv_i64 e)
|
2022-03-02 08:51:37 +03:00
|
|
|
|
{
|
2022-03-02 08:51:37 +03:00
|
|
|
|
TCGv_i64 th, tl, t0, t1, zero = tcg_constant_i64(0),
|
|
|
|
|
ones = tcg_constant_i64(-1);
|
|
|
|
|
|
|
|
|
|
th = tcg_temp_new_i64();
|
|
|
|
|
tl = tcg_temp_new_i64();
|
|
|
|
|
t0 = tcg_temp_new_i64();
|
|
|
|
|
t1 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
/* m = ~0 >> b */
|
|
|
|
|
tcg_gen_andi_i64(t0, b, 64);
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_NE, t1, t0, zero, zero, ones);
|
|
|
|
|
tcg_gen_andi_i64(t0, b, 0x3F);
|
|
|
|
|
tcg_gen_shr_i64(mh, t1, t0);
|
|
|
|
|
tcg_gen_shr_i64(ml, ones, t0);
|
|
|
|
|
tcg_gen_xori_i64(t0, t0, 63);
|
|
|
|
|
tcg_gen_shl_i64(t1, t1, t0);
|
|
|
|
|
tcg_gen_shli_i64(t1, t1, 1);
|
|
|
|
|
tcg_gen_or_i64(ml, t1, ml);
|
|
|
|
|
|
|
|
|
|
/* t = ~0 >> e */
|
|
|
|
|
tcg_gen_andi_i64(t0, e, 64);
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_NE, t1, t0, zero, zero, ones);
|
|
|
|
|
tcg_gen_andi_i64(t0, e, 0x3F);
|
|
|
|
|
tcg_gen_shr_i64(th, t1, t0);
|
|
|
|
|
tcg_gen_shr_i64(tl, ones, t0);
|
|
|
|
|
tcg_gen_xori_i64(t0, t0, 63);
|
|
|
|
|
tcg_gen_shl_i64(t1, t1, t0);
|
|
|
|
|
tcg_gen_shli_i64(t1, t1, 1);
|
|
|
|
|
tcg_gen_or_i64(tl, t1, tl);
|
|
|
|
|
|
|
|
|
|
/* t = t >> 1 */
|
2022-03-05 09:16:47 +03:00
|
|
|
|
tcg_gen_extract2_i64(tl, tl, th, 1);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
tcg_gen_shri_i64(th, th, 1);
|
|
|
|
|
|
|
|
|
|
/* m = m ^ t */
|
|
|
|
|
tcg_gen_xor_i64(mh, mh, th);
|
|
|
|
|
tcg_gen_xor_i64(ml, ml, tl);
|
|
|
|
|
|
|
|
|
|
/* Negate the mask if begin > end */
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_GT, t0, b, e, ones, zero);
|
|
|
|
|
|
|
|
|
|
tcg_gen_xor_i64(mh, mh, t0);
|
|
|
|
|
tcg_gen_xor_i64(ml, ml, t0);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool do_vector_rotl_quad(DisasContext *ctx, arg_VX *a, bool mask,
|
|
|
|
|
bool insert)
|
2022-03-02 08:51:37 +03:00
|
|
|
|
{
|
|
|
|
|
TCGv_i64 ah, al, vrb, n, t0, t1, zero = tcg_constant_i64(0);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
|
|
|
|
|
ah = tcg_temp_new_i64();
|
|
|
|
|
al = tcg_temp_new_i64();
|
2022-03-02 08:51:37 +03:00
|
|
|
|
vrb = tcg_temp_new_i64();
|
2022-03-02 08:51:37 +03:00
|
|
|
|
n = tcg_temp_new_i64();
|
|
|
|
|
t0 = tcg_temp_new_i64();
|
|
|
|
|
t1 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(ah, a->vra, true);
|
|
|
|
|
get_avr64(al, a->vra, false);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
get_avr64(vrb, a->vrb, true);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
|
|
|
|
tcg_gen_mov_i64(t0, ah);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
tcg_gen_andi_i64(t1, vrb, 64);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_NE, ah, t1, zero, al, ah);
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_NE, al, t1, zero, t0, al);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
tcg_gen_andi_i64(n, vrb, 0x3F);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
|
|
|
|
tcg_gen_shl_i64(t0, ah, n);
|
|
|
|
|
tcg_gen_shl_i64(t1, al, n);
|
|
|
|
|
|
|
|
|
|
tcg_gen_xori_i64(n, n, 63);
|
|
|
|
|
|
|
|
|
|
tcg_gen_shr_i64(al, al, n);
|
|
|
|
|
tcg_gen_shri_i64(al, al, 1);
|
|
|
|
|
tcg_gen_or_i64(t0, al, t0);
|
|
|
|
|
|
|
|
|
|
tcg_gen_shr_i64(ah, ah, n);
|
|
|
|
|
tcg_gen_shri_i64(ah, ah, 1);
|
|
|
|
|
tcg_gen_or_i64(t1, ah, t1);
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
if (mask || insert) {
|
2022-03-05 09:16:47 +03:00
|
|
|
|
tcg_gen_extract_i64(n, vrb, 8, 7);
|
|
|
|
|
tcg_gen_extract_i64(vrb, vrb, 16, 7);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
|
|
|
|
do_vrlq_mask(ah, al, vrb, n);
|
|
|
|
|
|
|
|
|
|
tcg_gen_and_i64(t0, t0, ah);
|
|
|
|
|
tcg_gen_and_i64(t1, t1, al);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
|
|
|
|
if (insert) {
|
|
|
|
|
get_avr64(n, a->vrt, true);
|
|
|
|
|
get_avr64(vrb, a->vrt, false);
|
2022-03-05 09:16:47 +03:00
|
|
|
|
tcg_gen_andc_i64(n, n, ah);
|
|
|
|
|
tcg_gen_andc_i64(vrb, vrb, al);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
tcg_gen_or_i64(t0, t0, n);
|
|
|
|
|
tcg_gen_or_i64(t1, t1, vrb);
|
|
|
|
|
}
|
2022-03-02 08:51:37 +03:00
|
|
|
|
}
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
set_avr64(a->vrt, t0, true);
|
|
|
|
|
set_avr64(a->vrt, t1, false);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
TRANS(VRLQ, do_vector_rotl_quad, false, false)
|
|
|
|
|
TRANS(VRLQNM, do_vector_rotl_quad, true, false)
|
|
|
|
|
TRANS(VRLQMI, do_vector_rotl_quad, false, true)
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
2019-02-15 13:00:57 +03:00
|
|
|
|
#define GEN_VXFORM_SAT(NAME, VECE, NORM, SAT, OPC2, OPC3) \
|
|
|
|
|
static void glue(glue(gen_, NAME), _vec)(unsigned vece, TCGv_vec t, \
|
|
|
|
|
TCGv_vec sat, TCGv_vec a, \
|
|
|
|
|
TCGv_vec b) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_vec x = tcg_temp_new_vec_matching(t); \
|
|
|
|
|
glue(glue(tcg_gen_, NORM), _vec)(VECE, x, a, b); \
|
|
|
|
|
glue(glue(tcg_gen_, SAT), _vec)(VECE, t, a, b); \
|
|
|
|
|
tcg_gen_cmp_vec(TCG_COND_NE, VECE, x, x, t); \
|
|
|
|
|
tcg_gen_or_vec(VECE, sat, sat, x); \
|
|
|
|
|
} \
|
|
|
|
|
static void glue(gen_, NAME)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
2019-03-17 03:27:29 +03:00
|
|
|
|
static const TCGOpcode vecop_list[] = { \
|
|
|
|
|
glue(glue(INDEX_op_, NORM), _vec), \
|
|
|
|
|
glue(glue(INDEX_op_, SAT), _vec), \
|
|
|
|
|
INDEX_op_cmp_vec, 0 \
|
|
|
|
|
}; \
|
2019-02-15 13:00:57 +03:00
|
|
|
|
static const GVecGen4 g = { \
|
|
|
|
|
.fniv = glue(glue(gen_, NAME), _vec), \
|
|
|
|
|
.fno = glue(gen_helper_, NAME), \
|
2019-03-17 03:27:29 +03:00
|
|
|
|
.opt_opc = vecop_list, \
|
2019-02-15 13:00:57 +03:00
|
|
|
|
.write_aofs = true, \
|
|
|
|
|
.vece = VECE, \
|
|
|
|
|
}; \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
2019-03-07 21:05:17 +03:00
|
|
|
|
tcg_gen_gvec_4(avr_full_offset(rD(ctx->opcode)), \
|
2019-02-15 13:00:57 +03:00
|
|
|
|
offsetof(CPUPPCState, vscr_sat), \
|
2019-03-07 21:05:17 +03:00
|
|
|
|
avr_full_offset(rA(ctx->opcode)), \
|
|
|
|
|
avr_full_offset(rB(ctx->opcode)), \
|
2019-02-15 13:00:57 +03:00
|
|
|
|
16, 16, &g); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GEN_VXFORM_SAT(vaddubs, MO_8, add, usadd, 0, 8);
|
2016-10-24 12:14:58 +03:00
|
|
|
|
GEN_VXFORM_DUAL_EXT(vaddubs, PPC_ALTIVEC, PPC_NONE, 0, \
|
|
|
|
|
vmul10uq, PPC_NONE, PPC2_ISA300, 0x0000F800)
|
2019-02-15 13:00:57 +03:00
|
|
|
|
GEN_VXFORM_SAT(vadduhs, MO_16, add, usadd, 0, 9);
|
2016-10-24 12:14:58 +03:00
|
|
|
|
GEN_VXFORM_DUAL(vadduhs, PPC_ALTIVEC, PPC_NONE, \
|
|
|
|
|
vmul10euq, PPC_NONE, PPC2_ISA300)
|
2019-02-15 13:00:57 +03:00
|
|
|
|
GEN_VXFORM_SAT(vadduws, MO_32, add, usadd, 0, 10);
|
|
|
|
|
GEN_VXFORM_SAT(vaddsbs, MO_8, add, ssadd, 0, 12);
|
|
|
|
|
GEN_VXFORM_SAT(vaddshs, MO_16, add, ssadd, 0, 13);
|
|
|
|
|
GEN_VXFORM_SAT(vaddsws, MO_32, add, ssadd, 0, 14);
|
|
|
|
|
GEN_VXFORM_SAT(vsububs, MO_8, sub, ussub, 0, 24);
|
|
|
|
|
GEN_VXFORM_SAT(vsubuhs, MO_16, sub, ussub, 0, 25);
|
|
|
|
|
GEN_VXFORM_SAT(vsubuws, MO_32, sub, ussub, 0, 26);
|
|
|
|
|
GEN_VXFORM_SAT(vsubsbs, MO_8, sub, sssub, 0, 28);
|
|
|
|
|
GEN_VXFORM_SAT(vsubshs, MO_16, sub, sssub, 0, 29);
|
|
|
|
|
GEN_VXFORM_SAT(vsubsws, MO_32, sub, sssub, 0, 30);
|
target/ppc: Optimize emulation of vsl and vsr instructions
Optimization of altivec instructions vsl and vsr(Vector Shift Left/Rigt).
Perform shift operation (left and right respectively) on 128 bit value of
register vA by value specified in bits 125-127 of register vB. Lowest 3
bits in each byte element of register vB must be identical or result is
undefined.
For vsl instruction, the first step is bits 125-127 of register vB have
to be saved in variable sh. Then, the highest sh bits of the lower
doubleword element of register vA are saved in variable shifted,
in order not to lose those bits when shift operation is performed on
the lower doubleword element of register vA, which is the next
step. After shifting the lower doubleword element shift operation
is performed on higher doubleword element of vA, with replacement of
the lowest sh bits(that are now 0) with bits saved in shifted.
For vsr instruction, firstly, the bits 125-127 of register vB have
to be saved in variable sh. Then, the lowest sh bits of the higher
doubleword element of register vA are saved in variable shifted,
in odred not to lose those bits when the shift operation is
performed on the higher doubleword element of register vA, which is
the next step. After shifting higher doubleword element, shift operation
is performed on lower doubleword element of vA, with replacement of
highest sh bits(that are now 0) with bits saved in shifted.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-3-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:48 +03:00
|
|
|
|
GEN_VXFORM_TRANS(vsl, 2, 7);
|
|
|
|
|
GEN_VXFORM_TRANS(vsr, 2, 11);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
GEN_VXFORM_ENV(vpkuhum, 7, 0);
|
|
|
|
|
GEN_VXFORM_ENV(vpkuwum, 7, 1);
|
|
|
|
|
GEN_VXFORM_ENV(vpkudum, 7, 17);
|
|
|
|
|
GEN_VXFORM_ENV(vpkuhus, 7, 2);
|
|
|
|
|
GEN_VXFORM_ENV(vpkuwus, 7, 3);
|
|
|
|
|
GEN_VXFORM_ENV(vpkudus, 7, 19);
|
|
|
|
|
GEN_VXFORM_ENV(vpkshus, 7, 4);
|
|
|
|
|
GEN_VXFORM_ENV(vpkswus, 7, 5);
|
|
|
|
|
GEN_VXFORM_ENV(vpksdus, 7, 21);
|
|
|
|
|
GEN_VXFORM_ENV(vpkshss, 7, 6);
|
|
|
|
|
GEN_VXFORM_ENV(vpkswss, 7, 7);
|
|
|
|
|
GEN_VXFORM_ENV(vpksdss, 7, 23);
|
|
|
|
|
GEN_VXFORM(vpkpx, 7, 12);
|
|
|
|
|
GEN_VXFORM_ENV(vsum4ubs, 4, 24);
|
|
|
|
|
GEN_VXFORM_ENV(vsum4sbs, 4, 28);
|
|
|
|
|
GEN_VXFORM_ENV(vsum4shs, 4, 25);
|
|
|
|
|
GEN_VXFORM_ENV(vsum2sws, 4, 26);
|
|
|
|
|
GEN_VXFORM_ENV(vsumsws, 4, 30);
|
|
|
|
|
GEN_VXFORM_ENV(vaddfp, 5, 0);
|
|
|
|
|
GEN_VXFORM_ENV(vsubfp, 5, 1);
|
|
|
|
|
GEN_VXFORM_ENV(vmaxfp, 5, 16);
|
|
|
|
|
GEN_VXFORM_ENV(vminfp, 5, 17);
|
2016-11-28 10:56:42 +03:00
|
|
|
|
GEN_VXFORM_HETRO(vextublx, 6, 24)
|
|
|
|
|
GEN_VXFORM_HETRO(vextuhlx, 6, 25)
|
|
|
|
|
GEN_VXFORM_HETRO(vextuwlx, 6, 26)
|
2019-08-27 12:37:43 +03:00
|
|
|
|
GEN_VXFORM_TRANS_DUAL(vmrgow, PPC_NONE, PPC2_ALTIVEC_207,
|
2016-11-28 10:56:42 +03:00
|
|
|
|
vextuwlx, PPC_NONE, PPC2_ISA300)
|
|
|
|
|
GEN_VXFORM_HETRO(vextubrx, 6, 28)
|
|
|
|
|
GEN_VXFORM_HETRO(vextuhrx, 6, 29)
|
|
|
|
|
GEN_VXFORM_HETRO(vextuwrx, 6, 30)
|
2019-07-15 17:22:47 +03:00
|
|
|
|
GEN_VXFORM_TRANS(lvsl, 6, 31)
|
|
|
|
|
GEN_VXFORM_TRANS(lvsr, 6, 32)
|
2019-08-27 12:37:43 +03:00
|
|
|
|
GEN_VXFORM_TRANS_DUAL(vmrgew, PPC_NONE, PPC2_ALTIVEC_207,
|
2016-11-28 10:56:42 +03:00
|
|
|
|
vextuwrx, PPC_NONE, PPC2_ISA300)
|
2016-07-27 09:56:23 +03:00
|
|
|
|
|
|
|
|
|
#define GEN_VXRFORM1(opname, name, str, opc2, opc3) \
|
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr ra, rb, rd; \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
ra = gen_avr_ptr(rA(ctx->opcode)); \
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode)); \
|
|
|
|
|
gen_helper_##opname(cpu_env, rd, ra, rb); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GEN_VXRFORM(name, opc2, opc3) \
|
|
|
|
|
GEN_VXRFORM1(name, name, #name, opc2, opc3) \
|
|
|
|
|
GEN_VXRFORM1(name##_dot, name##_, #name ".", opc2, (opc3 | (0x1 << 4)))
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Support for Altivec instructions that use bit 31 (Rc) as an opcode
|
|
|
|
|
* bit but also use bit 21 as an actual Rc bit. In general, thse pairs
|
|
|
|
|
* come from different versions of the ISA, so we must also support a
|
|
|
|
|
* pair of flags for each instruction.
|
|
|
|
|
*/
|
|
|
|
|
#define GEN_VXRFORM_DUAL(name0, flg0, flg2_0, name1, flg1, flg2_1) \
|
|
|
|
|
static void glue(gen_, name0##_##name1)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
if ((Rc(ctx->opcode) == 0) && \
|
|
|
|
|
((ctx->insns_flags & flg0) || (ctx->insns_flags2 & flg2_0))) { \
|
|
|
|
|
if (Rc21(ctx->opcode) == 0) { \
|
|
|
|
|
gen_##name0(ctx); \
|
|
|
|
|
} else { \
|
|
|
|
|
gen_##name0##_(ctx); \
|
|
|
|
|
} \
|
|
|
|
|
} else if ((Rc(ctx->opcode) == 1) && \
|
|
|
|
|
((ctx->insns_flags & flg1) || (ctx->insns_flags2 & flg2_1))) { \
|
|
|
|
|
if (Rc21(ctx->opcode) == 0) { \
|
|
|
|
|
gen_##name1(ctx); \
|
|
|
|
|
} else { \
|
|
|
|
|
gen_##name1##_(ctx); \
|
|
|
|
|
} \
|
|
|
|
|
} else { \
|
|
|
|
|
gen_inval_exception(ctx, POWERPC_EXCP_INVAL_INVAL); \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static void do_vcmp_rc(int vrt)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 tmp, set, clr;
|
|
|
|
|
|
|
|
|
|
tmp = tcg_temp_new_i64();
|
|
|
|
|
set = tcg_temp_new_i64();
|
|
|
|
|
clr = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(tmp, vrt, true);
|
|
|
|
|
tcg_gen_mov_i64(set, tmp);
|
|
|
|
|
get_avr64(tmp, vrt, false);
|
|
|
|
|
tcg_gen_or_i64(clr, set, tmp);
|
|
|
|
|
tcg_gen_and_i64(set, set, tmp);
|
|
|
|
|
|
|
|
|
|
tcg_gen_setcondi_i64(TCG_COND_EQ, clr, clr, 0);
|
|
|
|
|
tcg_gen_shli_i64(clr, clr, 1);
|
|
|
|
|
|
|
|
|
|
tcg_gen_setcondi_i64(TCG_COND_EQ, set, set, -1);
|
|
|
|
|
tcg_gen_shli_i64(set, set, 3);
|
|
|
|
|
|
|
|
|
|
tcg_gen_or_i64(tmp, set, clr);
|
|
|
|
|
tcg_gen_extrl_i64_i32(cpu_crf[6], tmp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool do_vcmp(DisasContext *ctx, arg_VC *a, TCGCond cond, int vece)
|
|
|
|
|
{
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_cmp(cond, vece, avr_full_offset(a->vrt),
|
|
|
|
|
avr_full_offset(a->vra), avr_full_offset(a->vrb), 16, 16);
|
|
|
|
|
|
|
|
|
|
if (a->rc) {
|
|
|
|
|
do_vcmp_rc(a->vrt);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VCMPEQUB, do_vcmp, TCG_COND_EQ, MO_8)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VCMPEQUH, do_vcmp, TCG_COND_EQ, MO_16)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VCMPEQUW, do_vcmp, TCG_COND_EQ, MO_32)
|
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VCMPEQUD, do_vcmp, TCG_COND_EQ, MO_64)
|
|
|
|
|
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VCMPGTSB, do_vcmp, TCG_COND_GT, MO_8)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VCMPGTSH, do_vcmp, TCG_COND_GT, MO_16)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VCMPGTSW, do_vcmp, TCG_COND_GT, MO_32)
|
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VCMPGTSD, do_vcmp, TCG_COND_GT, MO_64)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VCMPGTUB, do_vcmp, TCG_COND_GTU, MO_8)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VCMPGTUH, do_vcmp, TCG_COND_GTU, MO_16)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VCMPGTUW, do_vcmp, TCG_COND_GTU, MO_32)
|
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VCMPGTUD, do_vcmp, TCG_COND_GTU, MO_64)
|
|
|
|
|
|
|
|
|
|
TRANS_FLAGS2(ISA300, VCMPNEB, do_vcmp, TCG_COND_NE, MO_8)
|
|
|
|
|
TRANS_FLAGS2(ISA300, VCMPNEH, do_vcmp, TCG_COND_NE, MO_16)
|
|
|
|
|
TRANS_FLAGS2(ISA300, VCMPNEW, do_vcmp, TCG_COND_NE, MO_32)
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static void gen_vcmpnez_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
|
|
|
|
|
{
|
|
|
|
|
TCGv_vec t0, t1, zero;
|
|
|
|
|
|
|
|
|
|
t0 = tcg_temp_new_vec_matching(t);
|
|
|
|
|
t1 = tcg_temp_new_vec_matching(t);
|
|
|
|
|
zero = tcg_constant_vec_matching(t, vece, 0);
|
|
|
|
|
|
|
|
|
|
tcg_gen_cmp_vec(TCG_COND_EQ, vece, t0, a, zero);
|
|
|
|
|
tcg_gen_cmp_vec(TCG_COND_EQ, vece, t1, b, zero);
|
|
|
|
|
tcg_gen_cmp_vec(TCG_COND_NE, vece, t, a, b);
|
|
|
|
|
|
|
|
|
|
tcg_gen_or_vec(vece, t, t, t0);
|
|
|
|
|
tcg_gen_or_vec(vece, t, t, t1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool do_vcmpnez(DisasContext *ctx, arg_VC *a, int vece)
|
|
|
|
|
{
|
|
|
|
|
static const TCGOpcode vecop_list[] = {
|
|
|
|
|
INDEX_op_cmp_vec, 0
|
|
|
|
|
};
|
|
|
|
|
static const GVecGen3 ops[3] = {
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vcmpnez_vec,
|
|
|
|
|
.fno = gen_helper_VCMPNEZB,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.vece = MO_8
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vcmpnez_vec,
|
|
|
|
|
.fno = gen_helper_VCMPNEZH,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.vece = MO_16
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vcmpnez_vec,
|
|
|
|
|
.fno = gen_helper_VCMPNEZW,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.vece = MO_32
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA300);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16, &ops[vece]);
|
|
|
|
|
|
|
|
|
|
if (a->rc) {
|
|
|
|
|
do_vcmp_rc(a->vrt);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VCMPNEZB, do_vcmpnez, MO_8)
|
|
|
|
|
TRANS(VCMPNEZH, do_vcmpnez, MO_16)
|
|
|
|
|
TRANS(VCMPNEZW, do_vcmpnez, MO_32)
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool trans_VCMPEQUQ(DisasContext *ctx, arg_VC *a)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 t0, t1, t2;
|
|
|
|
|
|
|
|
|
|
t0 = tcg_temp_new_i64();
|
|
|
|
|
t1 = tcg_temp_new_i64();
|
|
|
|
|
t2 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(t0, a->vra, true);
|
|
|
|
|
get_avr64(t1, a->vrb, true);
|
|
|
|
|
tcg_gen_xor_i64(t2, t0, t1);
|
|
|
|
|
|
|
|
|
|
get_avr64(t0, a->vra, false);
|
|
|
|
|
get_avr64(t1, a->vrb, false);
|
|
|
|
|
tcg_gen_xor_i64(t1, t0, t1);
|
|
|
|
|
|
|
|
|
|
tcg_gen_or_i64(t1, t1, t2);
|
2023-08-05 03:22:26 +03:00
|
|
|
|
tcg_gen_negsetcond_i64(TCG_COND_EQ, t1, t1, tcg_constant_i64(0));
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
|
|
|
|
set_avr64(a->vrt, t1, true);
|
|
|
|
|
set_avr64(a->vrt, t1, false);
|
|
|
|
|
|
|
|
|
|
if (a->rc) {
|
|
|
|
|
tcg_gen_extrl_i64_i32(cpu_crf[6], t1);
|
|
|
|
|
tcg_gen_andi_i32(cpu_crf[6], cpu_crf[6], 0xa);
|
|
|
|
|
tcg_gen_xori_i32(cpu_crf[6], cpu_crf[6], 0x2);
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool do_vcmpgtq(DisasContext *ctx, arg_VC *a, bool sign)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 t0, t1, t2;
|
|
|
|
|
|
|
|
|
|
t0 = tcg_temp_new_i64();
|
|
|
|
|
t1 = tcg_temp_new_i64();
|
|
|
|
|
t2 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(t0, a->vra, false);
|
|
|
|
|
get_avr64(t1, a->vrb, false);
|
2023-08-05 03:22:26 +03:00
|
|
|
|
tcg_gen_negsetcond_i64(TCG_COND_GTU, t2, t0, t1);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
|
|
|
|
get_avr64(t0, a->vra, true);
|
|
|
|
|
get_avr64(t1, a->vrb, true);
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_EQ, t2, t0, t1, t2, tcg_constant_i64(0));
|
2023-08-05 03:22:26 +03:00
|
|
|
|
tcg_gen_negsetcond_i64(sign ? TCG_COND_GT : TCG_COND_GTU, t1, t0, t1);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
|
|
|
|
tcg_gen_or_i64(t1, t1, t2);
|
|
|
|
|
|
|
|
|
|
set_avr64(a->vrt, t1, true);
|
|
|
|
|
set_avr64(a->vrt, t1, false);
|
|
|
|
|
|
|
|
|
|
if (a->rc) {
|
|
|
|
|
tcg_gen_extrl_i64_i32(cpu_crf[6], t1);
|
|
|
|
|
tcg_gen_andi_i32(cpu_crf[6], cpu_crf[6], 0xa);
|
|
|
|
|
tcg_gen_xori_i32(cpu_crf[6], cpu_crf[6], 0x2);
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VCMPGTSQ, do_vcmpgtq, true)
|
|
|
|
|
TRANS(VCMPGTUQ, do_vcmpgtq, false)
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool do_vcmpq(DisasContext *ctx, arg_VX_bf *a, bool sign)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 vra, vrb;
|
|
|
|
|
TCGLabel *gt, *lt, *done;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
2023-01-30 03:45:12 +03:00
|
|
|
|
vra = tcg_temp_new_i64();
|
|
|
|
|
vrb = tcg_temp_new_i64();
|
2022-03-02 08:51:37 +03:00
|
|
|
|
gt = gen_new_label();
|
|
|
|
|
lt = gen_new_label();
|
|
|
|
|
done = gen_new_label();
|
|
|
|
|
|
|
|
|
|
get_avr64(vra, a->vra, true);
|
|
|
|
|
get_avr64(vrb, a->vrb, true);
|
|
|
|
|
tcg_gen_brcond_i64((sign ? TCG_COND_GT : TCG_COND_GTU), vra, vrb, gt);
|
|
|
|
|
tcg_gen_brcond_i64((sign ? TCG_COND_LT : TCG_COND_LTU), vra, vrb, lt);
|
|
|
|
|
|
|
|
|
|
get_avr64(vra, a->vra, false);
|
|
|
|
|
get_avr64(vrb, a->vrb, false);
|
|
|
|
|
tcg_gen_brcond_i64(TCG_COND_GTU, vra, vrb, gt);
|
|
|
|
|
tcg_gen_brcond_i64(TCG_COND_LTU, vra, vrb, lt);
|
|
|
|
|
|
|
|
|
|
tcg_gen_movi_i32(cpu_crf[a->bf], CRF_EQ);
|
|
|
|
|
tcg_gen_br(done);
|
|
|
|
|
|
|
|
|
|
gen_set_label(gt);
|
|
|
|
|
tcg_gen_movi_i32(cpu_crf[a->bf], CRF_GT);
|
|
|
|
|
tcg_gen_br(done);
|
|
|
|
|
|
|
|
|
|
gen_set_label(lt);
|
|
|
|
|
tcg_gen_movi_i32(cpu_crf[a->bf], CRF_LT);
|
|
|
|
|
tcg_gen_br(done);
|
|
|
|
|
|
|
|
|
|
gen_set_label(done);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VCMPSQ, do_vcmpq, true)
|
|
|
|
|
TRANS(VCMPUQ, do_vcmpq, false)
|
|
|
|
|
|
2016-07-27 09:56:23 +03:00
|
|
|
|
GEN_VXRFORM(vcmpeqfp, 3, 3)
|
|
|
|
|
GEN_VXRFORM(vcmpgefp, 3, 7)
|
|
|
|
|
GEN_VXRFORM(vcmpgtfp, 3, 11)
|
|
|
|
|
GEN_VXRFORM(vcmpbfp, 3, 15)
|
|
|
|
|
|
2020-03-29 00:58:36 +03:00
|
|
|
|
static void gen_vsplti(DisasContext *ctx, int vece)
|
|
|
|
|
{
|
|
|
|
|
int simm;
|
|
|
|
|
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) {
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU);
|
|
|
|
|
return;
|
2016-07-27 09:56:23 +03:00
|
|
|
|
}
|
|
|
|
|
|
2020-03-29 00:58:36 +03:00
|
|
|
|
simm = SIMM5(ctx->opcode);
|
|
|
|
|
tcg_gen_gvec_dup_imm(vece, avr_full_offset(rD(ctx->opcode)), 16, 16, simm);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GEN_VXFORM_VSPLTI(name, vece, opc2, opc3) \
|
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) { gen_vsplti(ctx, vece); }
|
|
|
|
|
|
|
|
|
|
GEN_VXFORM_VSPLTI(vspltisb, MO_8, 6, 12);
|
|
|
|
|
GEN_VXFORM_VSPLTI(vspltish, MO_16, 6, 13);
|
|
|
|
|
GEN_VXFORM_VSPLTI(vspltisw, MO_32, 6, 14);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
|
|
|
|
|
#define GEN_VXFORM_NOA(name, opc2, opc3) \
|
2019-03-21 15:47:02 +03:00
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr rb, rd; \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode)); \
|
2019-03-21 15:47:02 +03:00
|
|
|
|
gen_helper_##name(rd, rb); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GEN_VXFORM_NOA_ENV(name, opc2, opc3) \
|
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr rb, rd; \
|
|
|
|
|
\
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode)); \
|
|
|
|
|
gen_helper_##name(cpu_env, rd, rb); \
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-06 08:04:08 +03:00
|
|
|
|
#define GEN_VXFORM_NOA_2(name, opc2, opc3, opc4) \
|
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr rb, rd; \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode)); \
|
|
|
|
|
gen_helper_##name(rd, rb); \
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-28 08:45:18 +03:00
|
|
|
|
#define GEN_VXFORM_NOA_3(name, opc2, opc3, opc4) \
|
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr rb; \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
gen_helper_##name(cpu_gpr[rD(ctx->opcode)], rb); \
|
|
|
|
|
}
|
2016-07-27 09:56:23 +03:00
|
|
|
|
GEN_VXFORM_NOA(vupkhsb, 7, 8);
|
|
|
|
|
GEN_VXFORM_NOA(vupkhsh, 7, 9);
|
|
|
|
|
GEN_VXFORM_NOA(vupkhsw, 7, 25);
|
|
|
|
|
GEN_VXFORM_NOA(vupklsb, 7, 10);
|
|
|
|
|
GEN_VXFORM_NOA(vupklsh, 7, 11);
|
|
|
|
|
GEN_VXFORM_NOA(vupklsw, 7, 27);
|
|
|
|
|
GEN_VXFORM_NOA(vupkhpx, 7, 13);
|
|
|
|
|
GEN_VXFORM_NOA(vupklpx, 7, 15);
|
|
|
|
|
GEN_VXFORM_NOA_ENV(vrefp, 5, 4);
|
|
|
|
|
GEN_VXFORM_NOA_ENV(vrsqrtefp, 5, 5);
|
|
|
|
|
GEN_VXFORM_NOA_ENV(vexptefp, 5, 6);
|
|
|
|
|
GEN_VXFORM_NOA_ENV(vlogefp, 5, 7);
|
|
|
|
|
GEN_VXFORM_NOA_ENV(vrfim, 5, 11);
|
|
|
|
|
GEN_VXFORM_NOA_ENV(vrfin, 5, 8);
|
|
|
|
|
GEN_VXFORM_NOA_ENV(vrfip, 5, 10);
|
|
|
|
|
GEN_VXFORM_NOA_ENV(vrfiz, 5, 9);
|
target/ppc: Move VPRTYB[WDQ] to decodetree and use gvec
Moved VPRTYBW and VPRTYBD to use gvec and both of them and VPRTYBQ to
decodetree. VPRTYBW and VPRTYBD now also use .fni4 and .fni8,
respectively.
vprtybw:
rept loop master patch
8 12500 0,01198900 0,00703100 (-41.4%)
25 4000 0,01070100 0,00571400 (-46.6%)
100 1000 0,01123300 0,00678200 (-39.6%)
500 200 0,01601500 0,01535600 (-4.1%)
2500 40 0,03872900 0,05562100 (43.6%)
8000 12 0,10047000 0,16643000 (65.7%)
vprtybd:
rept loop master patch
8 12500 0,00757700 0,00788100 (4.0%)
25 4000 0,00652500 0,00669600 (2.6%)
100 1000 0,00714400 0,00825400 (15.5%)
500 200 0,01211000 0,01903700 (57.2%)
2500 40 0,03483800 0,07021200 (101.5%)
8000 12 0,09591800 0,21036200 (119.3%)
vprtybq:
rept loop master patch
8 12500 0,00675600 0,00667200 (-1.2%)
25 4000 0,00619400 0,00643200 (3.8%)
100 1000 0,00707100 0,00751100 (6.2%)
500 200 0,01199300 0,01342000 (11.9%)
2500 40 0,03490900 0,04092900 (17.2%)
8000 12 0,09588200 0,11465100 (19.6%)
I wasn't expecting such a performance lost in both VPRTYBD and VPRTYBQ,
I'm not sure if it's worth to move those instructions. Comparing the
assembly of the helper with the TCGop they are pretty similar, so
I'm not sure why vprtybd took so much more time.
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20221019125040.48028-6-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
2022-10-19 15:50:33 +03:00
|
|
|
|
|
|
|
|
|
static void gen_vprtyb_vec(unsigned vece, TCGv_vec t, TCGv_vec b)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
TCGv_vec tmp = tcg_temp_new_vec_matching(b);
|
2023-07-14 14:18:16 +03:00
|
|
|
|
/* MO_32 is 2, so 2 iterations for MO_32 and 3 for MO_64 */
|
target/ppc: Move VPRTYB[WDQ] to decodetree and use gvec
Moved VPRTYBW and VPRTYBD to use gvec and both of them and VPRTYBQ to
decodetree. VPRTYBW and VPRTYBD now also use .fni4 and .fni8,
respectively.
vprtybw:
rept loop master patch
8 12500 0,01198900 0,00703100 (-41.4%)
25 4000 0,01070100 0,00571400 (-46.6%)
100 1000 0,01123300 0,00678200 (-39.6%)
500 200 0,01601500 0,01535600 (-4.1%)
2500 40 0,03872900 0,05562100 (43.6%)
8000 12 0,10047000 0,16643000 (65.7%)
vprtybd:
rept loop master patch
8 12500 0,00757700 0,00788100 (4.0%)
25 4000 0,00652500 0,00669600 (2.6%)
100 1000 0,00714400 0,00825400 (15.5%)
500 200 0,01211000 0,01903700 (57.2%)
2500 40 0,03483800 0,07021200 (101.5%)
8000 12 0,09591800 0,21036200 (119.3%)
vprtybq:
rept loop master patch
8 12500 0,00675600 0,00667200 (-1.2%)
25 4000 0,00619400 0,00643200 (3.8%)
100 1000 0,00707100 0,00751100 (6.2%)
500 200 0,01199300 0,01342000 (11.9%)
2500 40 0,03490900 0,04092900 (17.2%)
8000 12 0,09588200 0,11465100 (19.6%)
I wasn't expecting such a performance lost in both VPRTYBD and VPRTYBQ,
I'm not sure if it's worth to move those instructions. Comparing the
assembly of the helper with the TCGop they are pretty similar, so
I'm not sure why vprtybd took so much more time.
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20221019125040.48028-6-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
2022-10-19 15:50:33 +03:00
|
|
|
|
for (i = 0; i < vece; i++) {
|
|
|
|
|
tcg_gen_shri_vec(vece, tmp, b, (4 << (vece - i)));
|
|
|
|
|
tcg_gen_xor_vec(vece, b, tmp, b);
|
|
|
|
|
}
|
|
|
|
|
tcg_gen_and_vec(vece, t, b, tcg_constant_vec_matching(t, vece, 1));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* vprtybw */
|
|
|
|
|
static void gen_vprtyb_i32(TCGv_i32 t, TCGv_i32 b)
|
|
|
|
|
{
|
|
|
|
|
tcg_gen_ctpop_i32(t, b);
|
|
|
|
|
tcg_gen_and_i32(t, t, tcg_constant_i32(1));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* vprtybd */
|
|
|
|
|
static void gen_vprtyb_i64(TCGv_i64 t, TCGv_i64 b)
|
|
|
|
|
{
|
|
|
|
|
tcg_gen_ctpop_i64(t, b);
|
|
|
|
|
tcg_gen_and_i64(t, t, tcg_constant_i64(1));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool do_vx_vprtyb(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
|
|
|
|
|
{
|
|
|
|
|
static const TCGOpcode vecop_list[] = {
|
|
|
|
|
INDEX_op_shri_vec, 0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const GVecGen2 op[] = {
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vprtyb_vec,
|
|
|
|
|
.fni4 = gen_vprtyb_i32,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.vece = MO_32
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vprtyb_vec,
|
|
|
|
|
.fni8 = gen_vprtyb_i64,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.vece = MO_64
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fno = gen_helper_VPRTYBQ,
|
|
|
|
|
.vece = MO_128
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA300);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_2(avr_full_offset(a->vrt), avr_full_offset(a->vrb),
|
|
|
|
|
16, 16, &op[vece - MO_32]);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VPRTYBW, do_vx_vprtyb, MO_32)
|
|
|
|
|
TRANS(VPRTYBD, do_vx_vprtyb, MO_64)
|
|
|
|
|
TRANS(VPRTYBQ, do_vx_vprtyb, MO_128)
|
2016-07-27 09:56:23 +03:00
|
|
|
|
|
2019-02-15 13:00:45 +03:00
|
|
|
|
static void gen_vsplt(DisasContext *ctx, int vece)
|
|
|
|
|
{
|
|
|
|
|
int uimm, dofs, bofs;
|
|
|
|
|
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) {
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU);
|
|
|
|
|
return;
|
2016-07-27 09:56:23 +03:00
|
|
|
|
}
|
|
|
|
|
|
2019-02-15 13:00:45 +03:00
|
|
|
|
uimm = UIMM5(ctx->opcode);
|
2019-03-07 21:05:17 +03:00
|
|
|
|
bofs = avr_full_offset(rB(ctx->opcode));
|
|
|
|
|
dofs = avr_full_offset(rD(ctx->opcode));
|
2019-02-15 13:00:45 +03:00
|
|
|
|
|
|
|
|
|
/* Experimental testing shows that hardware masks the immediate. */
|
|
|
|
|
bofs += (uimm << vece) & 15;
|
2022-03-23 18:57:17 +03:00
|
|
|
|
#if !HOST_BIG_ENDIAN
|
2019-02-15 13:00:45 +03:00
|
|
|
|
bofs ^= 15;
|
|
|
|
|
bofs &= ~((1 << vece) - 1);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_dup_mem(vece, dofs, bofs, 16, 16);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GEN_VXFORM_VSPLT(name, vece, opc2, opc3) \
|
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) { gen_vsplt(ctx, vece); }
|
|
|
|
|
|
2016-07-27 09:56:23 +03:00
|
|
|
|
#define GEN_VXFORM_UIMM_ENV(name, opc2, opc3) \
|
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr rb, rd; \
|
|
|
|
|
TCGv_i32 uimm; \
|
|
|
|
|
\
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
2023-02-26 07:18:31 +03:00
|
|
|
|
uimm = tcg_constant_i32(UIMM5(ctx->opcode)); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode)); \
|
|
|
|
|
gen_helper_##name(cpu_env, rd, rb, uimm); \
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-06 08:04:06 +03:00
|
|
|
|
#define GEN_VXFORM_UIMM_SPLAT(name, opc2, opc3, splat_max) \
|
|
|
|
|
static void glue(gen_, name)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr rb, rd; \
|
|
|
|
|
uint8_t uimm = UIMM4(ctx->opcode); \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
TCGv_i32 t0; \
|
2016-09-06 08:04:06 +03:00
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
if (uimm > splat_max) { \
|
|
|
|
|
uimm = 0; \
|
|
|
|
|
} \
|
2019-01-02 12:14:18 +03:00
|
|
|
|
t0 = tcg_temp_new_i32(); \
|
2016-09-06 08:04:06 +03:00
|
|
|
|
tcg_gen_movi_i32(t0, uimm); \
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode)); \
|
|
|
|
|
gen_helper_##name(rd, rb, t0); \
|
|
|
|
|
}
|
|
|
|
|
|
2019-02-15 13:00:45 +03:00
|
|
|
|
GEN_VXFORM_VSPLT(vspltb, MO_8, 6, 8);
|
|
|
|
|
GEN_VXFORM_VSPLT(vsplth, MO_16, 6, 9);
|
|
|
|
|
GEN_VXFORM_VSPLT(vspltw, MO_32, 6, 10);
|
2016-09-06 08:04:07 +03:00
|
|
|
|
GEN_VXFORM_UIMM_SPLAT(vextractub, 6, 8, 15);
|
|
|
|
|
GEN_VXFORM_UIMM_SPLAT(vextractuh, 6, 9, 14);
|
|
|
|
|
GEN_VXFORM_UIMM_SPLAT(vextractuw, 6, 10, 12);
|
|
|
|
|
GEN_VXFORM_UIMM_SPLAT(vextractd, 6, 11, 8);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
GEN_VXFORM_UIMM_ENV(vcfux, 5, 12);
|
|
|
|
|
GEN_VXFORM_UIMM_ENV(vcfsx, 5, 13);
|
|
|
|
|
GEN_VXFORM_UIMM_ENV(vctuxs, 5, 14);
|
|
|
|
|
GEN_VXFORM_UIMM_ENV(vctsxs, 5, 15);
|
2016-09-29 13:22:37 +03:00
|
|
|
|
GEN_VXFORM_DUAL(vspltb, PPC_ALTIVEC, PPC_NONE,
|
|
|
|
|
vextractub, PPC_NONE, PPC2_ISA300);
|
|
|
|
|
GEN_VXFORM_DUAL(vsplth, PPC_ALTIVEC, PPC_NONE,
|
|
|
|
|
vextractuh, PPC_NONE, PPC2_ISA300);
|
|
|
|
|
GEN_VXFORM_DUAL(vspltw, PPC_ALTIVEC, PPC_NONE,
|
|
|
|
|
vextractuw, PPC_NONE, PPC2_ISA300);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool trans_VGNB(DisasContext *ctx, arg_VX_n *a)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* Similar to do_vextractm, we'll use a sequence of mask-shift-or operations
|
|
|
|
|
* to gather the bits. The masks can be created with
|
|
|
|
|
*
|
|
|
|
|
* uint64_t mask(uint64_t n, uint64_t step)
|
|
|
|
|
* {
|
|
|
|
|
* uint64_t p = ((1UL << (1UL << step)) - 1UL) << ((n - 1UL) << step),
|
|
|
|
|
* plen = n << step, m = 0;
|
|
|
|
|
* for(int i = 0; i < 64/plen; i++) {
|
|
|
|
|
* m |= p;
|
|
|
|
|
* m = ror64(m, plen);
|
|
|
|
|
* }
|
|
|
|
|
* p >>= plen * DIV_ROUND_UP(64, plen) - 64;
|
|
|
|
|
* return m | p;
|
|
|
|
|
* }
|
|
|
|
|
*
|
|
|
|
|
* But since there are few values of N, we'll use a lookup table to avoid
|
|
|
|
|
* these calculations at runtime.
|
|
|
|
|
*/
|
|
|
|
|
static const uint64_t mask[6][5] = {
|
|
|
|
|
{
|
|
|
|
|
0xAAAAAAAAAAAAAAAAULL, 0xccccccccccccccccULL, 0xf0f0f0f0f0f0f0f0ULL,
|
|
|
|
|
0xff00ff00ff00ff00ULL, 0xffff0000ffff0000ULL
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
0x9249249249249249ULL, 0xC30C30C30C30C30CULL, 0xF00F00F00F00F00FULL,
|
|
|
|
|
0xFF0000FF0000FF00ULL, 0xFFFF00000000FFFFULL
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
/* For N >= 4, some mask operations can be elided */
|
|
|
|
|
0x8888888888888888ULL, 0, 0xf000f000f000f000ULL, 0,
|
|
|
|
|
0xFFFF000000000000ULL
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
0x8421084210842108ULL, 0, 0xF0000F0000F0000FULL, 0, 0
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
0x8208208208208208ULL, 0, 0xF00000F00000F000ULL, 0, 0
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
0x8102040810204081ULL, 0, 0xF000000F000000F0ULL, 0, 0
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
uint64_t m;
|
|
|
|
|
int i, sh, nbits = DIV_ROUND_UP(64, a->n);
|
|
|
|
|
TCGv_i64 hi, lo, t0, t1;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
if (a->n < 2) {
|
|
|
|
|
/*
|
|
|
|
|
* "N can be any value between 2 and 7, inclusive." Otherwise, the
|
|
|
|
|
* result is undefined, so we don't need to change RT. Also, N > 7 is
|
|
|
|
|
* impossible since the immediate field is 3 bits only.
|
|
|
|
|
*/
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hi = tcg_temp_new_i64();
|
|
|
|
|
lo = tcg_temp_new_i64();
|
|
|
|
|
t0 = tcg_temp_new_i64();
|
|
|
|
|
t1 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(hi, a->vrb, true);
|
|
|
|
|
get_avr64(lo, a->vrb, false);
|
|
|
|
|
|
|
|
|
|
/* Align the lower doubleword so we can use the same mask */
|
|
|
|
|
tcg_gen_shli_i64(lo, lo, a->n * nbits - 64);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Starting from the most significant bit, gather every Nth bit with a
|
|
|
|
|
* sequence of mask-shift-or operation. E.g.: for N=3
|
|
|
|
|
* AxxBxxCxxDxxExxFxxGxxHxxIxxJxxKxxLxxMxxNxxOxxPxxQxxRxxSxxTxxUxxV
|
|
|
|
|
* & rep(0b100)
|
|
|
|
|
* A..B..C..D..E..F..G..H..I..J..K..L..M..N..O..P..Q..R..S..T..U..V
|
|
|
|
|
* << 2
|
|
|
|
|
* .B..C..D..E..F..G..H..I..J..K..L..M..N..O..P..Q..R..S..T..U..V..
|
|
|
|
|
* |
|
|
|
|
|
* AB.BC.CD.DE.EF.FG.GH.HI.IJ.JK.KL.LM.MN.NO.OP.PQ.QR.RS.ST.TU.UV.V
|
|
|
|
|
* & rep(0b110000)
|
|
|
|
|
* AB....CD....EF....GH....IJ....KL....MN....OP....QR....ST....UV..
|
|
|
|
|
* << 4
|
|
|
|
|
* ..CD....EF....GH....IJ....KL....MN....OP....QR....ST....UV......
|
|
|
|
|
* |
|
|
|
|
|
* ABCD..CDEF..EFGH..GHIJ..IJKL..KLMN..MNOP..OPQR..QRST..STUV..UV..
|
|
|
|
|
* & rep(0b111100000000)
|
|
|
|
|
* ABCD........EFGH........IJKL........MNOP........QRST........UV..
|
|
|
|
|
* << 8
|
|
|
|
|
* ....EFGH........IJKL........MNOP........QRST........UV..........
|
|
|
|
|
* |
|
|
|
|
|
* ABCDEFGH....EFGHIJKL....IJKLMNOP....MNOPQRST....QRSTUV......UV..
|
|
|
|
|
* & rep(0b111111110000000000000000)
|
|
|
|
|
* ABCDEFGH................IJKLMNOP................QRSTUV..........
|
|
|
|
|
* << 16
|
|
|
|
|
* ........IJKLMNOP................QRSTUV..........................
|
|
|
|
|
* |
|
|
|
|
|
* ABCDEFGHIJKLMNOP........IJKLMNOPQRSTUV..........QRSTUV..........
|
|
|
|
|
* & rep(0b111111111111111100000000000000000000000000000000)
|
|
|
|
|
* ABCDEFGHIJKLMNOP................................QRSTUV..........
|
|
|
|
|
* << 32
|
|
|
|
|
* ................QRSTUV..........................................
|
|
|
|
|
* |
|
|
|
|
|
* ABCDEFGHIJKLMNOPQRSTUV..........................QRSTUV..........
|
|
|
|
|
*/
|
|
|
|
|
for (i = 0, sh = a->n - 1; i < 5; i++, sh <<= 1) {
|
|
|
|
|
m = mask[a->n - 2][i];
|
|
|
|
|
if (m) {
|
|
|
|
|
tcg_gen_andi_i64(hi, hi, m);
|
|
|
|
|
tcg_gen_andi_i64(lo, lo, m);
|
|
|
|
|
}
|
|
|
|
|
if (sh < 64) {
|
|
|
|
|
tcg_gen_shli_i64(t0, hi, sh);
|
|
|
|
|
tcg_gen_shli_i64(t1, lo, sh);
|
|
|
|
|
tcg_gen_or_i64(hi, t0, hi);
|
|
|
|
|
tcg_gen_or_i64(lo, t1, lo);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tcg_gen_andi_i64(hi, hi, ~(~0ULL >> nbits));
|
|
|
|
|
tcg_gen_andi_i64(lo, lo, ~(~0ULL >> nbits));
|
|
|
|
|
tcg_gen_shri_i64(lo, lo, nbits);
|
|
|
|
|
tcg_gen_or_i64(hi, hi, lo);
|
|
|
|
|
tcg_gen_trunc_i64_tl(cpu_gpr[a->rt], hi);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-04 15:37:03 +03:00
|
|
|
|
static bool do_vextdx(DisasContext *ctx, arg_VA *a, int size, bool right,
|
|
|
|
|
void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv))
|
|
|
|
|
{
|
|
|
|
|
TCGv_ptr vrt, vra, vrb;
|
|
|
|
|
TCGv rc;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
vrt = gen_avr_ptr(a->vrt);
|
|
|
|
|
vra = gen_avr_ptr(a->vra);
|
|
|
|
|
vrb = gen_avr_ptr(a->vrb);
|
|
|
|
|
rc = tcg_temp_new();
|
|
|
|
|
|
|
|
|
|
tcg_gen_andi_tl(rc, cpu_gpr[a->rc], 0x1F);
|
|
|
|
|
if (right) {
|
|
|
|
|
tcg_gen_subfi_tl(rc, 32 - size, rc);
|
|
|
|
|
}
|
|
|
|
|
gen_helper(cpu_env, vrt, vra, vrb, rc);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VEXTDUBVLX, do_vextdx, 1, false, gen_helper_VEXTDUBVLX)
|
|
|
|
|
TRANS(VEXTDUHVLX, do_vextdx, 2, false, gen_helper_VEXTDUHVLX)
|
|
|
|
|
TRANS(VEXTDUWVLX, do_vextdx, 4, false, gen_helper_VEXTDUWVLX)
|
|
|
|
|
TRANS(VEXTDDVLX, do_vextdx, 8, false, gen_helper_VEXTDDVLX)
|
|
|
|
|
|
|
|
|
|
TRANS(VEXTDUBVRX, do_vextdx, 1, true, gen_helper_VEXTDUBVLX)
|
|
|
|
|
TRANS(VEXTDUHVRX, do_vextdx, 2, true, gen_helper_VEXTDUHVLX)
|
|
|
|
|
TRANS(VEXTDUWVRX, do_vextdx, 4, true, gen_helper_VEXTDUWVLX)
|
|
|
|
|
TRANS(VEXTDDVRX, do_vextdx, 8, true, gen_helper_VEXTDDVLX)
|
|
|
|
|
|
2021-11-04 15:36:59 +03:00
|
|
|
|
static bool do_vinsx(DisasContext *ctx, int vrt, int size, bool right, TCGv ra,
|
|
|
|
|
TCGv_i64 rb, void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
|
|
|
|
|
{
|
|
|
|
|
TCGv_ptr t;
|
|
|
|
|
TCGv idx;
|
|
|
|
|
|
|
|
|
|
t = gen_avr_ptr(vrt);
|
|
|
|
|
idx = tcg_temp_new();
|
|
|
|
|
|
|
|
|
|
tcg_gen_andi_tl(idx, ra, 0xF);
|
|
|
|
|
if (right) {
|
|
|
|
|
tcg_gen_subfi_tl(idx, 16 - size, idx);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
gen_helper(cpu_env, t, rb, idx);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-04 15:37:01 +03:00
|
|
|
|
static bool do_vinsvx(DisasContext *ctx, int vrt, int size, bool right, TCGv ra,
|
|
|
|
|
int vrb, void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 val;
|
|
|
|
|
|
|
|
|
|
val = tcg_temp_new_i64();
|
|
|
|
|
get_avr64(val, vrb, true);
|
2023-02-25 09:16:38 +03:00
|
|
|
|
return do_vinsx(ctx, vrt, size, right, ra, val, gen_helper);
|
2021-11-04 15:37:01 +03:00
|
|
|
|
}
|
|
|
|
|
|
2021-11-04 15:36:59 +03:00
|
|
|
|
static bool do_vinsx_VX(DisasContext *ctx, arg_VX *a, int size, bool right,
|
|
|
|
|
void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 val;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
val = tcg_temp_new_i64();
|
|
|
|
|
tcg_gen_extu_tl_i64(val, cpu_gpr[a->vrb]);
|
|
|
|
|
|
2023-02-25 09:16:38 +03:00
|
|
|
|
return do_vinsx(ctx, a->vrt, size, right, cpu_gpr[a->vra], val, gen_helper);
|
2021-11-04 15:36:59 +03:00
|
|
|
|
}
|
|
|
|
|
|
2021-11-04 15:37:01 +03:00
|
|
|
|
static bool do_vinsvx_VX(DisasContext *ctx, arg_VX *a, int size, bool right,
|
|
|
|
|
void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
|
|
|
|
|
{
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
return do_vinsvx(ctx, a->vrt, size, right, cpu_gpr[a->vra], a->vrb,
|
|
|
|
|
gen_helper);
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-04 15:37:00 +03:00
|
|
|
|
static bool do_vins_VX_uim4(DisasContext *ctx, arg_VX_uim4 *a, int size,
|
|
|
|
|
void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 val;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
if (a->uim > (16 - size)) {
|
|
|
|
|
/*
|
|
|
|
|
* PowerISA v3.1 says that the resulting value is undefined in this
|
|
|
|
|
* case, so just log a guest error and leave VRT unchanged. The
|
|
|
|
|
* real hardware would do a partial insert, e.g. if VRT is zeroed and
|
|
|
|
|
* RB is 0x12345678, executing "vinsw VRT,RB,14" results in
|
|
|
|
|
* VRT = 0x0000...00001234, but we don't bother to reproduce this
|
|
|
|
|
* behavior as software shouldn't rely on it.
|
|
|
|
|
*/
|
|
|
|
|
qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for VINS* at"
|
|
|
|
|
" 0x" TARGET_FMT_lx ", UIM = %d > %d\n", ctx->cia, a->uim,
|
|
|
|
|
16 - size);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
val = tcg_temp_new_i64();
|
|
|
|
|
tcg_gen_extu_tl_i64(val, cpu_gpr[a->vrb]);
|
|
|
|
|
|
2023-02-25 09:16:38 +03:00
|
|
|
|
return do_vinsx(ctx, a->vrt, size, false, tcg_constant_tl(a->uim), val,
|
|
|
|
|
gen_helper);
|
2021-11-04 15:37:00 +03:00
|
|
|
|
}
|
|
|
|
|
|
2021-11-04 15:37:02 +03:00
|
|
|
|
static bool do_vinsert_VX_uim4(DisasContext *ctx, arg_VX_uim4 *a, int size,
|
|
|
|
|
void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv))
|
|
|
|
|
{
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA300);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
if (a->uim > (16 - size)) {
|
|
|
|
|
qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for VINSERT* at"
|
|
|
|
|
" 0x" TARGET_FMT_lx ", UIM = %d > %d\n", ctx->cia, a->uim,
|
|
|
|
|
16 - size);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return do_vinsvx(ctx, a->vrt, size, false, tcg_constant_tl(a->uim), a->vrb,
|
|
|
|
|
gen_helper);
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-04 15:36:59 +03:00
|
|
|
|
TRANS(VINSBLX, do_vinsx_VX, 1, false, gen_helper_VINSBLX)
|
|
|
|
|
TRANS(VINSHLX, do_vinsx_VX, 2, false, gen_helper_VINSHLX)
|
|
|
|
|
TRANS(VINSWLX, do_vinsx_VX, 4, false, gen_helper_VINSWLX)
|
|
|
|
|
TRANS(VINSDLX, do_vinsx_VX, 8, false, gen_helper_VINSDLX)
|
|
|
|
|
|
|
|
|
|
TRANS(VINSBRX, do_vinsx_VX, 1, true, gen_helper_VINSBLX)
|
|
|
|
|
TRANS(VINSHRX, do_vinsx_VX, 2, true, gen_helper_VINSHLX)
|
|
|
|
|
TRANS(VINSWRX, do_vinsx_VX, 4, true, gen_helper_VINSWLX)
|
|
|
|
|
TRANS(VINSDRX, do_vinsx_VX, 8, true, gen_helper_VINSDLX)
|
|
|
|
|
|
2021-11-04 15:37:00 +03:00
|
|
|
|
TRANS(VINSW, do_vins_VX_uim4, 4, gen_helper_VINSWLX)
|
|
|
|
|
TRANS(VINSD, do_vins_VX_uim4, 8, gen_helper_VINSDLX)
|
|
|
|
|
|
2021-11-04 15:37:01 +03:00
|
|
|
|
TRANS(VINSBVLX, do_vinsvx_VX, 1, false, gen_helper_VINSBLX)
|
|
|
|
|
TRANS(VINSHVLX, do_vinsvx_VX, 2, false, gen_helper_VINSHLX)
|
|
|
|
|
TRANS(VINSWVLX, do_vinsvx_VX, 4, false, gen_helper_VINSWLX)
|
|
|
|
|
|
|
|
|
|
TRANS(VINSBVRX, do_vinsvx_VX, 1, true, gen_helper_VINSBLX)
|
|
|
|
|
TRANS(VINSHVRX, do_vinsvx_VX, 2, true, gen_helper_VINSHLX)
|
|
|
|
|
TRANS(VINSWVRX, do_vinsvx_VX, 4, true, gen_helper_VINSWLX)
|
|
|
|
|
|
2021-11-04 15:37:02 +03:00
|
|
|
|
TRANS(VINSERTB, do_vinsert_VX_uim4, 1, gen_helper_VINSBLX)
|
|
|
|
|
TRANS(VINSERTH, do_vinsert_VX_uim4, 2, gen_helper_VINSHLX)
|
|
|
|
|
TRANS(VINSERTW, do_vinsert_VX_uim4, 4, gen_helper_VINSWLX)
|
|
|
|
|
TRANS(VINSERTD, do_vinsert_VX_uim4, 8, gen_helper_VINSDLX)
|
|
|
|
|
|
2016-07-27 09:56:23 +03:00
|
|
|
|
static void gen_vsldoi(DisasContext *ctx)
|
|
|
|
|
{
|
|
|
|
|
TCGv_ptr ra, rb, rd;
|
|
|
|
|
TCGv_i32 sh;
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) {
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
ra = gen_avr_ptr(rA(ctx->opcode));
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode));
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode));
|
2023-02-26 07:18:31 +03:00
|
|
|
|
sh = tcg_constant_i32(VSH(ctx->opcode));
|
2019-03-21 15:47:02 +03:00
|
|
|
|
gen_helper_vsldoi(rd, ra, rb, sh);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
}
|
|
|
|
|
|
2021-11-04 15:36:58 +03:00
|
|
|
|
static bool trans_VSLDBI(DisasContext *ctx, arg_VN *a)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 t0, t1, t2;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
t0 = tcg_temp_new_i64();
|
|
|
|
|
t1 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(t0, a->vra, true);
|
|
|
|
|
get_avr64(t1, a->vra, false);
|
|
|
|
|
|
|
|
|
|
if (a->sh != 0) {
|
|
|
|
|
t2 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(t2, a->vrb, true);
|
|
|
|
|
|
|
|
|
|
tcg_gen_extract2_i64(t0, t1, t0, 64 - a->sh);
|
|
|
|
|
tcg_gen_extract2_i64(t1, t2, t1, 64 - a->sh);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
set_avr64(a->vrt, t0, true);
|
|
|
|
|
set_avr64(a->vrt, t1, false);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool trans_VSRDBI(DisasContext *ctx, arg_VN *a)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 t2, t1, t0;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
t0 = tcg_temp_new_i64();
|
|
|
|
|
t1 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(t0, a->vrb, false);
|
|
|
|
|
get_avr64(t1, a->vrb, true);
|
|
|
|
|
|
|
|
|
|
if (a->sh != 0) {
|
|
|
|
|
t2 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(t2, a->vra, false);
|
|
|
|
|
|
|
|
|
|
tcg_gen_extract2_i64(t0, t0, t1, a->sh);
|
|
|
|
|
tcg_gen_extract2_i64(t1, t1, t2, a->sh);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
set_avr64(a->vrt, t0, false);
|
|
|
|
|
set_avr64(a->vrt, t1, true);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 19:57:13 +03:00
|
|
|
|
static bool do_vexpand(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
|
|
|
|
|
{
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_sari(vece, avr_full_offset(a->vrt), avr_full_offset(a->vrb),
|
|
|
|
|
(8 << vece) - 1, 16, 16);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VEXPANDBM, do_vexpand, MO_8)
|
|
|
|
|
TRANS(VEXPANDHM, do_vexpand, MO_16)
|
|
|
|
|
TRANS(VEXPANDWM, do_vexpand, MO_32)
|
|
|
|
|
TRANS(VEXPANDDM, do_vexpand, MO_64)
|
|
|
|
|
|
|
|
|
|
static bool trans_VEXPANDQM(DisasContext *ctx, arg_VX_tb *a)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 tmp;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tmp = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(tmp, a->vrb, true);
|
|
|
|
|
tcg_gen_sari_i64(tmp, tmp, 63);
|
|
|
|
|
set_avr64(a->vrt, tmp, false);
|
|
|
|
|
set_avr64(a->vrt, tmp, true);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 19:57:13 +03:00
|
|
|
|
static bool do_vextractm(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
|
|
|
|
|
{
|
|
|
|
|
const uint64_t elem_width = 8 << vece, elem_count_half = 8 >> vece,
|
2023-05-04 12:35:39 +03:00
|
|
|
|
mask = dup_const(vece, 1ULL << (elem_width - 1));
|
2021-12-17 19:57:13 +03:00
|
|
|
|
uint64_t i, j;
|
|
|
|
|
TCGv_i64 lo, hi, t0, t1;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
hi = tcg_temp_new_i64();
|
|
|
|
|
lo = tcg_temp_new_i64();
|
|
|
|
|
t0 = tcg_temp_new_i64();
|
|
|
|
|
t1 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(lo, a->vrb, false);
|
|
|
|
|
get_avr64(hi, a->vrb, true);
|
|
|
|
|
|
|
|
|
|
tcg_gen_andi_i64(lo, lo, mask);
|
|
|
|
|
tcg_gen_andi_i64(hi, hi, mask);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Gather the most significant bit of each element in the highest element
|
|
|
|
|
* element. E.g. for bytes:
|
|
|
|
|
* aXXXXXXXbXXXXXXXcXXXXXXXdXXXXXXXeXXXXXXXfXXXXXXXgXXXXXXXhXXXXXXX
|
|
|
|
|
* & dup(1 << (elem_width - 1))
|
|
|
|
|
* a0000000b0000000c0000000d0000000e0000000f0000000g0000000h0000000
|
|
|
|
|
* << 32 - 4
|
|
|
|
|
* 0000e0000000f0000000g0000000h00000000000000000000000000000000000
|
|
|
|
|
* |
|
|
|
|
|
* a000e000b000f000c000g000d000h000e0000000f0000000g0000000h0000000
|
|
|
|
|
* << 16 - 2
|
|
|
|
|
* 00c000g000d000h000e0000000f0000000g0000000h000000000000000000000
|
|
|
|
|
* |
|
|
|
|
|
* a0c0e0g0b0d0f0h0c0e0g000d0f0h000e0g00000f0h00000g0000000h0000000
|
|
|
|
|
* << 8 - 1
|
|
|
|
|
* 0b0d0f0h0c0e0g000d0f0h000e0g00000f0h00000g0000000h00000000000000
|
|
|
|
|
* |
|
|
|
|
|
* abcdefghbcdefgh0cdefgh00defgh000efgh0000fgh00000gh000000h0000000
|
|
|
|
|
*/
|
|
|
|
|
for (i = elem_count_half / 2, j = 32; i > 0; i >>= 1, j >>= 1) {
|
|
|
|
|
tcg_gen_shli_i64(t0, hi, j - i);
|
|
|
|
|
tcg_gen_shli_i64(t1, lo, j - i);
|
|
|
|
|
tcg_gen_or_i64(hi, hi, t0);
|
|
|
|
|
tcg_gen_or_i64(lo, lo, t1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tcg_gen_shri_i64(hi, hi, 64 - elem_count_half);
|
|
|
|
|
tcg_gen_extract2_i64(lo, lo, hi, 64 - elem_count_half);
|
|
|
|
|
tcg_gen_trunc_i64_tl(cpu_gpr[a->vrt], lo);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VEXTRACTBM, do_vextractm, MO_8)
|
|
|
|
|
TRANS(VEXTRACTHM, do_vextractm, MO_16)
|
|
|
|
|
TRANS(VEXTRACTWM, do_vextractm, MO_32)
|
|
|
|
|
TRANS(VEXTRACTDM, do_vextractm, MO_64)
|
|
|
|
|
|
|
|
|
|
static bool trans_VEXTRACTQM(DisasContext *ctx, arg_VX_tb *a)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 tmp;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tmp = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(tmp, a->vrb, true);
|
|
|
|
|
tcg_gen_shri_i64(tmp, tmp, 63);
|
|
|
|
|
tcg_gen_trunc_i64_tl(cpu_gpr[a->vrt], tmp);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-17 19:57:13 +03:00
|
|
|
|
static bool do_mtvsrm(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
|
|
|
|
|
{
|
|
|
|
|
const uint64_t elem_width = 8 << vece, elem_count_half = 8 >> vece;
|
|
|
|
|
uint64_t c;
|
|
|
|
|
int i, j;
|
|
|
|
|
TCGv_i64 hi, lo, t0, t1;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
hi = tcg_temp_new_i64();
|
|
|
|
|
lo = tcg_temp_new_i64();
|
|
|
|
|
t0 = tcg_temp_new_i64();
|
|
|
|
|
t1 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
tcg_gen_extu_tl_i64(t0, cpu_gpr[a->vrb]);
|
|
|
|
|
tcg_gen_extract_i64(hi, t0, elem_count_half, elem_count_half);
|
|
|
|
|
tcg_gen_extract_i64(lo, t0, 0, elem_count_half);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Spread the bits into their respective elements.
|
|
|
|
|
* E.g. for bytes:
|
|
|
|
|
* 00000000000000000000000000000000000000000000000000000000abcdefgh
|
|
|
|
|
* << 32 - 4
|
|
|
|
|
* 0000000000000000000000000000abcdefgh0000000000000000000000000000
|
|
|
|
|
* |
|
|
|
|
|
* 0000000000000000000000000000abcdefgh00000000000000000000abcdefgh
|
|
|
|
|
* << 16 - 2
|
|
|
|
|
* 00000000000000abcdefgh00000000000000000000abcdefgh00000000000000
|
|
|
|
|
* |
|
|
|
|
|
* 00000000000000abcdefgh000000abcdefgh000000abcdefgh000000abcdefgh
|
|
|
|
|
* << 8 - 1
|
|
|
|
|
* 0000000abcdefgh000000abcdefgh000000abcdefgh000000abcdefgh0000000
|
|
|
|
|
* |
|
|
|
|
|
* 0000000abcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgXbcdefgh
|
|
|
|
|
* & dup(1)
|
|
|
|
|
* 0000000a0000000b0000000c0000000d0000000e0000000f0000000g0000000h
|
|
|
|
|
* * 0xff
|
|
|
|
|
* aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffffgggggggghhhhhhhh
|
|
|
|
|
*/
|
|
|
|
|
for (i = elem_count_half / 2, j = 32; i > 0; i >>= 1, j >>= 1) {
|
|
|
|
|
tcg_gen_shli_i64(t0, hi, j - i);
|
|
|
|
|
tcg_gen_shli_i64(t1, lo, j - i);
|
|
|
|
|
tcg_gen_or_i64(hi, hi, t0);
|
|
|
|
|
tcg_gen_or_i64(lo, lo, t1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
c = dup_const(vece, 1);
|
|
|
|
|
tcg_gen_andi_i64(hi, hi, c);
|
|
|
|
|
tcg_gen_andi_i64(lo, lo, c);
|
|
|
|
|
|
|
|
|
|
c = MAKE_64BIT_MASK(0, elem_width);
|
|
|
|
|
tcg_gen_muli_i64(hi, hi, c);
|
|
|
|
|
tcg_gen_muli_i64(lo, lo, c);
|
|
|
|
|
|
|
|
|
|
set_avr64(a->vrt, lo, false);
|
|
|
|
|
set_avr64(a->vrt, hi, true);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(MTVSRBM, do_mtvsrm, MO_8)
|
|
|
|
|
TRANS(MTVSRHM, do_mtvsrm, MO_16)
|
|
|
|
|
TRANS(MTVSRWM, do_mtvsrm, MO_32)
|
|
|
|
|
TRANS(MTVSRDM, do_mtvsrm, MO_64)
|
|
|
|
|
|
|
|
|
|
static bool trans_MTVSRQM(DisasContext *ctx, arg_VX_tb *a)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 tmp;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tmp = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
tcg_gen_ext_tl_i64(tmp, cpu_gpr[a->vrb]);
|
|
|
|
|
tcg_gen_sextract_i64(tmp, tmp, 0, 1);
|
|
|
|
|
set_avr64(a->vrt, tmp, false);
|
|
|
|
|
set_avr64(a->vrt, tmp, true);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool trans_MTVSRBMI(DisasContext *ctx, arg_DX_b *a)
|
|
|
|
|
{
|
|
|
|
|
const uint64_t mask = dup_const(MO_8, 1);
|
|
|
|
|
uint64_t hi, lo;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
hi = extract16(a->b, 8, 8);
|
|
|
|
|
lo = extract16(a->b, 0, 8);
|
|
|
|
|
|
|
|
|
|
for (int i = 4, j = 32; i > 0; i >>= 1, j >>= 1) {
|
|
|
|
|
hi |= hi << (j - i);
|
|
|
|
|
lo |= lo << (j - i);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hi = (hi & mask) * 0xFF;
|
|
|
|
|
lo = (lo & mask) * 0xFF;
|
|
|
|
|
|
|
|
|
|
set_avr64(a->vrt, tcg_constant_i64(hi), true);
|
|
|
|
|
set_avr64(a->vrt, tcg_constant_i64(lo), false);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool do_vcntmb(DisasContext *ctx, arg_VX_mp *a, int vece)
|
|
|
|
|
{
|
2023-02-26 07:17:50 +03:00
|
|
|
|
TCGv_i64 r[2], mask;
|
|
|
|
|
|
|
|
|
|
r[0] = tcg_temp_new_i64();
|
|
|
|
|
r[1] = tcg_temp_new_i64();
|
2022-03-02 08:51:37 +03:00
|
|
|
|
mask = tcg_constant_i64(dup_const(vece, 1ULL << ((8 << vece) - 1)));
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < 2; i++) {
|
2023-02-26 07:17:50 +03:00
|
|
|
|
get_avr64(r[i], a->vrb, i);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
if (a->mp) {
|
2023-02-26 07:17:50 +03:00
|
|
|
|
tcg_gen_and_i64(r[i], mask, r[i]);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
} else {
|
2023-02-26 07:17:50 +03:00
|
|
|
|
tcg_gen_andc_i64(r[i], mask, r[i]);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
}
|
2023-02-26 07:17:50 +03:00
|
|
|
|
tcg_gen_ctpop_i64(r[i], r[i]);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
}
|
|
|
|
|
|
2023-02-26 07:17:50 +03:00
|
|
|
|
tcg_gen_add_i64(r[0], r[0], r[1]);
|
|
|
|
|
tcg_gen_shli_i64(r[0], r[0], TARGET_LONG_BITS - 8 + vece);
|
|
|
|
|
tcg_gen_trunc_i64_tl(cpu_gpr[a->rt], r[0]);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VCNTMBB, do_vcntmb, MO_8)
|
|
|
|
|
TRANS(VCNTMBH, do_vcntmb, MO_16)
|
|
|
|
|
TRANS(VCNTMBW, do_vcntmb, MO_32)
|
|
|
|
|
TRANS(VCNTMBD, do_vcntmb, MO_64)
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool do_vstri(DisasContext *ctx, arg_VX_tb_rc *a,
|
|
|
|
|
void (*gen_helper)(TCGv_i32, TCGv_ptr, TCGv_ptr))
|
|
|
|
|
{
|
|
|
|
|
TCGv_ptr vrt, vrb;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
vrt = gen_avr_ptr(a->vrt);
|
|
|
|
|
vrb = gen_avr_ptr(a->vrb);
|
|
|
|
|
|
|
|
|
|
if (a->rc) {
|
|
|
|
|
gen_helper(cpu_crf[6], vrt, vrb);
|
|
|
|
|
} else {
|
|
|
|
|
TCGv_i32 discard = tcg_temp_new_i32();
|
|
|
|
|
gen_helper(discard, vrt, vrb);
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VSTRIBL, do_vstri, gen_helper_VSTRIBL)
|
|
|
|
|
TRANS(VSTRIBR, do_vstri, gen_helper_VSTRIBR)
|
|
|
|
|
TRANS(VSTRIHL, do_vstri, gen_helper_VSTRIHL)
|
|
|
|
|
TRANS(VSTRIHR, do_vstri, gen_helper_VSTRIHR)
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool do_vclrb(DisasContext *ctx, arg_VX *a, bool right)
|
2022-03-02 08:51:37 +03:00
|
|
|
|
{
|
|
|
|
|
TCGv_i64 rb, mh, ml, tmp,
|
|
|
|
|
ones = tcg_constant_i64(-1),
|
|
|
|
|
zero = tcg_constant_i64(0);
|
|
|
|
|
|
|
|
|
|
rb = tcg_temp_new_i64();
|
|
|
|
|
mh = tcg_temp_new_i64();
|
|
|
|
|
ml = tcg_temp_new_i64();
|
|
|
|
|
tmp = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
tcg_gen_extu_tl_i64(rb, cpu_gpr[a->vrb]);
|
|
|
|
|
tcg_gen_andi_i64(tmp, rb, 7);
|
|
|
|
|
tcg_gen_shli_i64(tmp, tmp, 3);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
if (right) {
|
|
|
|
|
tcg_gen_shr_i64(tmp, ones, tmp);
|
|
|
|
|
} else {
|
|
|
|
|
tcg_gen_shl_i64(tmp, ones, tmp);
|
|
|
|
|
}
|
2022-03-02 08:51:37 +03:00
|
|
|
|
tcg_gen_not_i64(tmp, tmp);
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
if (right) {
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_LTU, mh, rb, tcg_constant_i64(8),
|
|
|
|
|
tmp, ones);
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_LTU, ml, rb, tcg_constant_i64(8),
|
|
|
|
|
zero, tmp);
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_LTU, ml, rb, tcg_constant_i64(16),
|
|
|
|
|
ml, ones);
|
|
|
|
|
} else {
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_LTU, ml, rb, tcg_constant_i64(8),
|
|
|
|
|
tmp, ones);
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_LTU, mh, rb, tcg_constant_i64(8),
|
|
|
|
|
zero, tmp);
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_LTU, mh, rb, tcg_constant_i64(16),
|
|
|
|
|
mh, ones);
|
|
|
|
|
}
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
|
|
|
|
get_avr64(tmp, a->vra, true);
|
|
|
|
|
tcg_gen_and_i64(tmp, tmp, mh);
|
|
|
|
|
set_avr64(a->vrt, tmp, true);
|
|
|
|
|
|
|
|
|
|
get_avr64(tmp, a->vra, false);
|
|
|
|
|
tcg_gen_and_i64(tmp, tmp, ml);
|
|
|
|
|
set_avr64(a->vrt, tmp, false);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
TRANS(VCLRLB, do_vclrb, false)
|
|
|
|
|
TRANS(VCLRRB, do_vclrb, true)
|
|
|
|
|
|
2016-07-27 09:56:23 +03:00
|
|
|
|
#define GEN_VAFORM_PAIRED(name0, name1, opc2) \
|
|
|
|
|
static void glue(gen_, name0##_##name1)(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr ra, rb, rc, rd; \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
ra = gen_avr_ptr(rA(ctx->opcode)); \
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
rc = gen_avr_ptr(rC(ctx->opcode)); \
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode)); \
|
|
|
|
|
if (Rc(ctx->opcode)) { \
|
|
|
|
|
gen_helper_##name1(cpu_env, rd, ra, rb, rc); \
|
|
|
|
|
} else { \
|
|
|
|
|
gen_helper_##name0(cpu_env, rd, ra, rb, rc); \
|
|
|
|
|
} \
|
|
|
|
|
}
|
|
|
|
|
|
target/ppc: Move VMH[R]ADDSHS instruction to decodetree
This patch moves VMHADDSHS and VMHRADDSHS to decodetree I couldn't find
a satisfactory implementation with TCG inline.
vmhaddshs:
rept loop master patch
8 12500 0,02983400 0,02648500 (-11.2%)
25 4000 0,02946000 0,02518000 (-14.5%)
100 1000 0,03104300 0,02638000 (-15.0%)
500 200 0,04002000 0,03502500 (-12.5%)
2500 40 0,08090100 0,07562200 (-6.5%)
8000 12 0,19242600 0,18626800 (-3.2%)
vmhraddshs:
rept loop master patch
8 12500 0,03078600 0,02851000 (-7.4%)
25 4000 0,02793200 0,02746900 (-1.7%)
100 1000 0,02886000 0,02839900 (-1.6%)
500 200 0,03714700 0,03799200 (+2.3%)
2500 40 0,07948000 0,07852200 (-1.2%)
8000 12 0,19049800 0,18813900 (-1.2%)
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20221019125040.48028-3-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
2022-10-19 15:50:30 +03:00
|
|
|
|
GEN_VAFORM_PAIRED(vmaddfp, vnmsubfp, 23)
|
2016-07-27 09:56:23 +03:00
|
|
|
|
|
2022-05-17 15:39:26 +03:00
|
|
|
|
static bool do_va_helper(DisasContext *ctx, arg_VA *a,
|
|
|
|
|
void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
|
2016-09-06 08:04:10 +03:00
|
|
|
|
{
|
2022-03-02 08:51:37 +03:00
|
|
|
|
TCGv_ptr vrt, vra, vrb, vrc;
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
vrt = gen_avr_ptr(a->vrt);
|
|
|
|
|
vra = gen_avr_ptr(a->vra);
|
|
|
|
|
vrb = gen_avr_ptr(a->vrb);
|
|
|
|
|
vrc = gen_avr_ptr(a->rc);
|
2022-05-17 15:39:26 +03:00
|
|
|
|
gen_helper(vrt, vra, vrb, vrc);
|
2022-03-02 08:51:37 +03:00
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-06-06 18:00:33 +03:00
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VADDECUQ, do_va_helper, gen_helper_VADDECUQ)
|
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VADDEUQM, do_va_helper, gen_helper_VADDEUQM)
|
|
|
|
|
|
2022-06-06 18:00:36 +03:00
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VSUBEUQM, do_va_helper, gen_helper_VSUBEUQM)
|
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VSUBECUQ, do_va_helper, gen_helper_VSUBECUQ)
|
|
|
|
|
|
2022-05-17 15:39:26 +03:00
|
|
|
|
TRANS_FLAGS(ALTIVEC, VPERM, do_va_helper, gen_helper_VPERM)
|
|
|
|
|
TRANS_FLAGS2(ISA300, VPERMR, do_va_helper, gen_helper_VPERMR)
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
target/ppc: Moved VMLADDUHM to decodetree and use gvec
This patch moves VMLADDUHM to decodetree a creates a gvec implementation
using mul_vec and add_vec.
rept loop master patch
8 12500 0,01810500 0,00903100 (-50.1%)
25 4000 0,01739400 0,00747700 (-57.0%)
100 1000 0,01843600 0,00901400 (-51.1%)
500 200 0,02574600 0,01971000 (-23.4%)
2500 40 0,05921600 0,07121800 (+20.3%)
8000 12 0,15326700 0,21725200 (+41.7%)
The significant difference in performance when REPT is low and LOOP is
high I think is due to the fact that the new implementation has a higher
translation time, as when using a helper only 5 TCGop are used but with
the patch a total of 10 TCGop are needed (Power lacks a direct mul_vec
equivalent so this instruction is implemented with the help of 5 others,
vmuleu, vmulou, vmrgh, vmrgl and vpkum).
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20221019125040.48028-2-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
2022-10-19 15:50:29 +03:00
|
|
|
|
static void gen_vmladduhm_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
|
|
|
|
|
TCGv_vec c)
|
|
|
|
|
{
|
|
|
|
|
tcg_gen_mul_vec(vece, t, a, b);
|
|
|
|
|
tcg_gen_add_vec(vece, t, t, c);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool trans_VMLADDUHM(DisasContext *ctx, arg_VA *a)
|
|
|
|
|
{
|
|
|
|
|
static const TCGOpcode vecop_list[] = {
|
|
|
|
|
INDEX_op_add_vec, INDEX_op_mul_vec, 0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const GVecGen4 op = {
|
|
|
|
|
.fno = gen_helper_VMLADDUHM,
|
|
|
|
|
.fniv = gen_vmladduhm_vec,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.vece = MO_16
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_4(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), avr_full_offset(a->rc),
|
|
|
|
|
16, 16, &op);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool trans_VSEL(DisasContext *ctx, arg_VA *a)
|
|
|
|
|
{
|
|
|
|
|
REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_bitsel(MO_64, avr_full_offset(a->vrt), avr_full_offset(a->rc),
|
|
|
|
|
avr_full_offset(a->vrb), avr_full_offset(a->vra),
|
|
|
|
|
16, 16);
|
|
|
|
|
|
|
|
|
|
return true;
|
2016-09-06 08:04:10 +03:00
|
|
|
|
}
|
|
|
|
|
|
2022-05-17 15:39:27 +03:00
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMSUMUBM, do_va_helper, gen_helper_VMSUMUBM)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMSUMMBM, do_va_helper, gen_helper_VMSUMMBM)
|
2022-05-17 15:39:29 +03:00
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMSUMSHM, do_va_helper, gen_helper_VMSUMSHM)
|
2022-05-17 15:39:28 +03:00
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMSUMUHM, do_va_helper, gen_helper_VMSUMUHM)
|
|
|
|
|
|
|
|
|
|
static bool do_va_env_helper(DisasContext *ctx, arg_VA *a,
|
|
|
|
|
void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_ptr))
|
|
|
|
|
{
|
|
|
|
|
TCGv_ptr vrt, vra, vrb, vrc;
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
vrt = gen_avr_ptr(a->vrt);
|
|
|
|
|
vra = gen_avr_ptr(a->vra);
|
|
|
|
|
vrb = gen_avr_ptr(a->vrb);
|
|
|
|
|
vrc = gen_avr_ptr(a->rc);
|
|
|
|
|
gen_helper(cpu_env, vrt, vra, vrb, vrc);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMSUMUHS, do_va_env_helper, gen_helper_VMSUMUHS)
|
2022-05-17 15:39:29 +03:00
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMSUMSHS, do_va_env_helper, gen_helper_VMSUMSHS)
|
2022-05-17 15:39:27 +03:00
|
|
|
|
|
target/ppc: Move VMH[R]ADDSHS instruction to decodetree
This patch moves VMHADDSHS and VMHRADDSHS to decodetree I couldn't find
a satisfactory implementation with TCG inline.
vmhaddshs:
rept loop master patch
8 12500 0,02983400 0,02648500 (-11.2%)
25 4000 0,02946000 0,02518000 (-14.5%)
100 1000 0,03104300 0,02638000 (-15.0%)
500 200 0,04002000 0,03502500 (-12.5%)
2500 40 0,08090100 0,07562200 (-6.5%)
8000 12 0,19242600 0,18626800 (-3.2%)
vmhraddshs:
rept loop master patch
8 12500 0,03078600 0,02851000 (-7.4%)
25 4000 0,02793200 0,02746900 (-1.7%)
100 1000 0,02886000 0,02839900 (-1.6%)
500 200 0,03714700 0,03799200 (+2.3%)
2500 40 0,07948000 0,07852200 (-1.2%)
8000 12 0,19049800 0,18813900 (-1.2%)
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20221019125040.48028-3-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
2022-10-19 15:50:30 +03:00
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMHADDSHS, do_va_env_helper, gen_helper_VMHADDSHS)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMHRADDSHS, do_va_env_helper, gen_helper_VMHRADDSHS)
|
2016-07-27 09:56:23 +03:00
|
|
|
|
|
|
|
|
|
GEN_VXFORM_NOA(vclzb, 1, 28)
|
|
|
|
|
GEN_VXFORM_NOA(vclzh, 1, 29)
|
2019-07-15 17:22:52 +03:00
|
|
|
|
GEN_VXFORM_TRANS(vclzw, 1, 30)
|
2019-07-15 17:22:51 +03:00
|
|
|
|
GEN_VXFORM_TRANS(vclzd, 1, 31)
|
target/ppc: Move VNEG[WD] to decodtree and use gvec
Moved the instructions VNEGW and VNEGD to decodetree and used gvec to
decode it.
vnegw:
rept loop master patch
8 12500 0,01053200 0,00548400 (-47.9%)
25 4000 0,01030500 0,00390000 (-62.2%)
100 1000 0,01096300 0,00395400 (-63.9%)
500 200 0,01472000 0,00712300 (-51.6%)
2500 40 0,03809000 0,02147700 (-43.6%)
8000 12 0,09957100 0,06202100 (-37.7%)
vnegd:
rept loop master patch
8 12500 0,00594600 0,00543800 (-8.5%)
25 4000 0,00575200 0,00396400 (-31.1%)
100 1000 0,00676100 0,00394800 (-41.6%)
500 200 0,01149300 0,00709400 (-38.3%)
2500 40 0,03441500 0,02169600 (-37.0%)
8000 12 0,09516900 0,06337000 (-33.4%)
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20221019125040.48028-5-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
2022-10-19 15:50:32 +03:00
|
|
|
|
|
|
|
|
|
static bool do_vneg(DisasContext *ctx, arg_VX_tb *a, unsigned vece)
|
|
|
|
|
{
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA300);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_neg(vece, avr_full_offset(a->vrt), avr_full_offset(a->vrb),
|
|
|
|
|
16, 16);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VNEGW, do_vneg, MO_32)
|
|
|
|
|
TRANS(VNEGD, do_vneg, MO_64)
|
2022-03-02 08:51:37 +03:00
|
|
|
|
|
|
|
|
|
static void gen_vexts_i64(TCGv_i64 t, TCGv_i64 b, int64_t s)
|
|
|
|
|
{
|
|
|
|
|
tcg_gen_sextract_i64(t, b, 0, 64 - s);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void gen_vexts_i32(TCGv_i32 t, TCGv_i32 b, int32_t s)
|
|
|
|
|
{
|
|
|
|
|
tcg_gen_sextract_i32(t, b, 0, 32 - s);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void gen_vexts_vec(unsigned vece, TCGv_vec t, TCGv_vec b, int64_t s)
|
|
|
|
|
{
|
|
|
|
|
tcg_gen_shli_vec(vece, t, b, s);
|
|
|
|
|
tcg_gen_sari_vec(vece, t, t, s);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool do_vexts(DisasContext *ctx, arg_VX_tb *a, unsigned vece, int64_t s)
|
|
|
|
|
{
|
|
|
|
|
static const TCGOpcode vecop_list[] = {
|
|
|
|
|
INDEX_op_shli_vec, INDEX_op_sari_vec, 0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const GVecGen2i op[2] = {
|
|
|
|
|
{
|
|
|
|
|
.fni4 = gen_vexts_i32,
|
|
|
|
|
.fniv = gen_vexts_vec,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.vece = MO_32
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fni8 = gen_vexts_i64,
|
|
|
|
|
.fniv = gen_vexts_vec,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.vece = MO_64
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA300);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_2i(avr_full_offset(a->vrt), avr_full_offset(a->vrb),
|
|
|
|
|
16, 16, s, &op[vece - MO_32]);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VEXTSB2W, do_vexts, MO_32, 24);
|
|
|
|
|
TRANS(VEXTSH2W, do_vexts, MO_32, 16);
|
|
|
|
|
TRANS(VEXTSB2D, do_vexts, MO_64, 56);
|
|
|
|
|
TRANS(VEXTSH2D, do_vexts, MO_64, 48);
|
|
|
|
|
TRANS(VEXTSW2D, do_vexts, MO_64, 32);
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool trans_VEXTSD2Q(DisasContext *ctx, arg_VX_tb *a)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 tmp;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tmp = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(tmp, a->vrb, false);
|
|
|
|
|
set_avr64(a->vrt, tmp, false);
|
|
|
|
|
tcg_gen_sari_i64(tmp, tmp, 63);
|
|
|
|
|
set_avr64(a->vrt, tmp, true);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-06 08:04:08 +03:00
|
|
|
|
GEN_VXFORM_NOA_2(vctzb, 1, 24, 28)
|
|
|
|
|
GEN_VXFORM_NOA_2(vctzh, 1, 24, 29)
|
|
|
|
|
GEN_VXFORM_NOA_2(vctzw, 1, 24, 30)
|
|
|
|
|
GEN_VXFORM_NOA_2(vctzd, 1, 24, 31)
|
2016-09-28 08:45:18 +03:00
|
|
|
|
GEN_VXFORM_NOA_3(vclzlsbb, 1, 24, 0)
|
|
|
|
|
GEN_VXFORM_NOA_3(vctzlsbb, 1, 24, 1)
|
2016-07-27 09:56:23 +03:00
|
|
|
|
GEN_VXFORM_NOA(vpopcntb, 1, 28)
|
|
|
|
|
GEN_VXFORM_NOA(vpopcnth, 1, 29)
|
|
|
|
|
GEN_VXFORM_NOA(vpopcntw, 1, 30)
|
|
|
|
|
GEN_VXFORM_NOA(vpopcntd, 1, 31)
|
|
|
|
|
GEN_VXFORM_DUAL(vclzb, PPC_NONE, PPC2_ALTIVEC_207, \
|
|
|
|
|
vpopcntb, PPC_NONE, PPC2_ALTIVEC_207)
|
|
|
|
|
GEN_VXFORM_DUAL(vclzh, PPC_NONE, PPC2_ALTIVEC_207, \
|
|
|
|
|
vpopcnth, PPC_NONE, PPC2_ALTIVEC_207)
|
|
|
|
|
GEN_VXFORM_DUAL(vclzw, PPC_NONE, PPC2_ALTIVEC_207, \
|
|
|
|
|
vpopcntw, PPC_NONE, PPC2_ALTIVEC_207)
|
|
|
|
|
GEN_VXFORM_DUAL(vclzd, PPC_NONE, PPC2_ALTIVEC_207, \
|
|
|
|
|
vpopcntd, PPC_NONE, PPC2_ALTIVEC_207)
|
2016-09-06 08:04:09 +03:00
|
|
|
|
GEN_VXFORM(vbpermd, 6, 23);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
GEN_VXFORM(vbpermq, 6, 21);
|
target/ppc: Optimize emulation of vgbbd instruction
Optimize altivec instruction vgbbd (Vector Gather Bits by Bytes by Doubleword)
All ith bits (i in range 1 to 8) of each byte of doubleword element in
source register are concatenated and placed into ith byte of appropriate
doubleword element in destination register.
Following solution is done for both doubleword elements of source register
in parallel, in order to reduce the number of instructions needed(that's why
arrays are used):
First, both doubleword elements of source register vB are placed in
appropriate element of array avr. Bits are gathered in 2x8 iterations(2 for
loops). In first iteration bit 1 of byte 1, bit 2 of byte 2,... bit 8 of
byte 8 are in their final spots so avr[i], i={0,1} can be and-ed with
tcg_mask. For every following iteration, both avr[i] and tcg_mask variables
have to be shifted right for 7 and 8 places, respectively, in order to get
bit 1 of byte 2, bit 2 of byte 3.. bit 7 of byte 8 in their final spots so
shifted avr values(saved in tmp) can be and-ed with new value of tcg_mask...
After first 8 iteration(first loop), all the first bits are in their final
places, all second bits but second bit from eight byte are in their places...
only 1 eight bit from eight byte is in it's place). In second loop we do all
operations symmetrically, in order to get other half of bits in their final
spots. Results for first and second doubleword elements are saved in
result[0] and result[1] respectively. In the end those results are saved in
appropriate doubleword element of destination register vD.
Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1563200574-11098-5-git-send-email-stefan.brankovic@rt-rk.com>
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
2019-07-15 17:22:50 +03:00
|
|
|
|
GEN_VXFORM_TRANS(vgbbd, 6, 20);
|
2016-07-27 09:56:23 +03:00
|
|
|
|
GEN_VXFORM(vpmsumb, 4, 16)
|
|
|
|
|
GEN_VXFORM(vpmsumh, 4, 17)
|
|
|
|
|
GEN_VXFORM(vpmsumw, 4, 18)
|
|
|
|
|
|
|
|
|
|
#define GEN_BCD(op) \
|
|
|
|
|
static void gen_##op(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr ra, rb, rd; \
|
|
|
|
|
TCGv_i32 ps; \
|
|
|
|
|
\
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
ra = gen_avr_ptr(rA(ctx->opcode)); \
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode)); \
|
|
|
|
|
\
|
2023-02-26 07:18:31 +03:00
|
|
|
|
ps = tcg_constant_i32((ctx->opcode & 0x200) != 0); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
\
|
|
|
|
|
gen_helper_##op(cpu_crf[6], rd, ra, rb, ps); \
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-08 19:50:22 +03:00
|
|
|
|
#define GEN_BCD2(op) \
|
|
|
|
|
static void gen_##op(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr rd, rb; \
|
|
|
|
|
TCGv_i32 ps; \
|
|
|
|
|
\
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
\
|
|
|
|
|
rb = gen_avr_ptr(rB(ctx->opcode)); \
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode)); \
|
|
|
|
|
\
|
2023-02-26 07:18:31 +03:00
|
|
|
|
ps = tcg_constant_i32((ctx->opcode & 0x200) != 0); \
|
2016-11-08 19:50:22 +03:00
|
|
|
|
\
|
|
|
|
|
gen_helper_##op(cpu_crf[6], rd, rb, ps); \
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-27 09:56:23 +03:00
|
|
|
|
GEN_BCD(bcdadd)
|
|
|
|
|
GEN_BCD(bcdsub)
|
2016-11-08 19:50:22 +03:00
|
|
|
|
GEN_BCD2(bcdcfn)
|
2016-11-08 19:50:23 +03:00
|
|
|
|
GEN_BCD2(bcdctn)
|
2016-11-08 19:50:24 +03:00
|
|
|
|
GEN_BCD2(bcdcfz)
|
2016-11-08 19:50:25 +03:00
|
|
|
|
GEN_BCD2(bcdctz)
|
2016-11-25 06:53:30 +03:00
|
|
|
|
GEN_BCD2(bcdcfsq)
|
2016-11-25 06:53:31 +03:00
|
|
|
|
GEN_BCD2(bcdctsq)
|
2016-11-25 06:53:33 +03:00
|
|
|
|
GEN_BCD2(bcdsetsgn)
|
2016-11-25 06:53:32 +03:00
|
|
|
|
GEN_BCD(bcdcpsgn);
|
2017-01-10 05:10:10 +03:00
|
|
|
|
GEN_BCD(bcds);
|
2017-01-10 05:10:11 +03:00
|
|
|
|
GEN_BCD(bcdus);
|
2017-01-10 05:10:12 +03:00
|
|
|
|
GEN_BCD(bcdsr);
|
2017-01-12 23:08:32 +03:00
|
|
|
|
GEN_BCD(bcdtrunc);
|
2017-01-12 23:08:33 +03:00
|
|
|
|
GEN_BCD(bcdutrunc);
|
2016-11-08 19:50:22 +03:00
|
|
|
|
|
|
|
|
|
static void gen_xpnd04_1(DisasContext *ctx)
|
|
|
|
|
{
|
|
|
|
|
switch (opc4(ctx->opcode)) {
|
2016-11-25 06:53:31 +03:00
|
|
|
|
case 0:
|
|
|
|
|
gen_bcdctsq(ctx);
|
|
|
|
|
break;
|
2016-11-25 06:53:30 +03:00
|
|
|
|
case 2:
|
|
|
|
|
gen_bcdcfsq(ctx);
|
|
|
|
|
break;
|
2016-11-08 19:50:25 +03:00
|
|
|
|
case 4:
|
|
|
|
|
gen_bcdctz(ctx);
|
|
|
|
|
break;
|
2016-11-08 19:50:23 +03:00
|
|
|
|
case 5:
|
|
|
|
|
gen_bcdctn(ctx);
|
|
|
|
|
break;
|
2016-11-08 19:50:24 +03:00
|
|
|
|
case 6:
|
|
|
|
|
gen_bcdcfz(ctx);
|
|
|
|
|
break;
|
2016-11-08 19:50:22 +03:00
|
|
|
|
case 7:
|
|
|
|
|
gen_bcdcfn(ctx);
|
|
|
|
|
break;
|
2016-11-25 06:53:33 +03:00
|
|
|
|
case 31:
|
|
|
|
|
gen_bcdsetsgn(ctx);
|
|
|
|
|
break;
|
2016-11-08 19:50:22 +03:00
|
|
|
|
default:
|
|
|
|
|
gen_invalid(ctx);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void gen_xpnd04_2(DisasContext *ctx)
|
|
|
|
|
{
|
|
|
|
|
switch (opc4(ctx->opcode)) {
|
2016-11-25 06:53:31 +03:00
|
|
|
|
case 0:
|
|
|
|
|
gen_bcdctsq(ctx);
|
|
|
|
|
break;
|
2016-11-25 06:53:30 +03:00
|
|
|
|
case 2:
|
|
|
|
|
gen_bcdcfsq(ctx);
|
|
|
|
|
break;
|
2016-11-08 19:50:25 +03:00
|
|
|
|
case 4:
|
|
|
|
|
gen_bcdctz(ctx);
|
|
|
|
|
break;
|
2016-11-08 19:50:24 +03:00
|
|
|
|
case 6:
|
|
|
|
|
gen_bcdcfz(ctx);
|
|
|
|
|
break;
|
2016-11-08 19:50:22 +03:00
|
|
|
|
case 7:
|
|
|
|
|
gen_bcdcfn(ctx);
|
|
|
|
|
break;
|
2016-11-25 06:53:33 +03:00
|
|
|
|
case 31:
|
|
|
|
|
gen_bcdsetsgn(ctx);
|
|
|
|
|
break;
|
2016-11-08 19:50:22 +03:00
|
|
|
|
default:
|
|
|
|
|
gen_invalid(ctx);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2016-11-25 06:53:33 +03:00
|
|
|
|
|
2016-11-08 19:50:22 +03:00
|
|
|
|
GEN_VXFORM_DUAL(vsubsws, PPC_ALTIVEC, PPC_NONE, \
|
|
|
|
|
xpnd04_2, PPC_NONE, PPC2_ISA300)
|
2016-07-27 09:56:23 +03:00
|
|
|
|
|
|
|
|
|
GEN_VXFORM_DUAL(vsububm, PPC_ALTIVEC, PPC_NONE, \
|
|
|
|
|
bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
|
|
|
|
|
GEN_VXFORM_DUAL(vsububs, PPC_ALTIVEC, PPC_NONE, \
|
|
|
|
|
bcdadd, PPC_NONE, PPC2_ALTIVEC_207)
|
|
|
|
|
GEN_VXFORM_DUAL(vsubuhm, PPC_ALTIVEC, PPC_NONE, \
|
|
|
|
|
bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
|
|
|
|
|
GEN_VXFORM_DUAL(vsubuhs, PPC_ALTIVEC, PPC_NONE, \
|
|
|
|
|
bcdsub, PPC_NONE, PPC2_ALTIVEC_207)
|
2016-11-25 06:53:32 +03:00
|
|
|
|
GEN_VXFORM_DUAL(vaddshs, PPC_ALTIVEC, PPC_NONE, \
|
|
|
|
|
bcdcpsgn, PPC_NONE, PPC2_ISA300)
|
2017-01-10 05:10:10 +03:00
|
|
|
|
GEN_VXFORM_DUAL(vsubudm, PPC2_ALTIVEC_207, PPC_NONE, \
|
|
|
|
|
bcds, PPC_NONE, PPC2_ISA300)
|
2017-01-10 05:10:11 +03:00
|
|
|
|
GEN_VXFORM_DUAL(vsubuwm, PPC_ALTIVEC, PPC_NONE, \
|
|
|
|
|
bcdus, PPC_NONE, PPC2_ISA300)
|
2017-01-12 23:08:32 +03:00
|
|
|
|
GEN_VXFORM_DUAL(vsubsbs, PPC_ALTIVEC, PPC_NONE, \
|
|
|
|
|
bcdtrunc, PPC_NONE, PPC2_ISA300)
|
2016-07-27 09:56:23 +03:00
|
|
|
|
|
|
|
|
|
static void gen_vsbox(DisasContext *ctx)
|
|
|
|
|
{
|
|
|
|
|
TCGv_ptr ra, rd;
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) {
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
ra = gen_avr_ptr(rA(ctx->opcode));
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode));
|
|
|
|
|
gen_helper_vsbox(rd, ra);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GEN_VXFORM(vcipher, 4, 20)
|
|
|
|
|
GEN_VXFORM(vcipherlast, 4, 20)
|
|
|
|
|
GEN_VXFORM(vncipher, 4, 21)
|
|
|
|
|
GEN_VXFORM(vncipherlast, 4, 21)
|
|
|
|
|
|
|
|
|
|
GEN_VXFORM_DUAL(vcipher, PPC_NONE, PPC2_ALTIVEC_207,
|
|
|
|
|
vcipherlast, PPC_NONE, PPC2_ALTIVEC_207)
|
|
|
|
|
GEN_VXFORM_DUAL(vncipher, PPC_NONE, PPC2_ALTIVEC_207,
|
|
|
|
|
vncipherlast, PPC_NONE, PPC2_ALTIVEC_207)
|
|
|
|
|
|
|
|
|
|
#define VSHASIGMA(op) \
|
|
|
|
|
static void gen_##op(DisasContext *ctx) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_ptr ra, rd; \
|
|
|
|
|
TCGv_i32 st_six; \
|
|
|
|
|
if (unlikely(!ctx->altivec_enabled)) { \
|
|
|
|
|
gen_exception(ctx, POWERPC_EXCP_VPU); \
|
|
|
|
|
return; \
|
|
|
|
|
} \
|
|
|
|
|
ra = gen_avr_ptr(rA(ctx->opcode)); \
|
|
|
|
|
rd = gen_avr_ptr(rD(ctx->opcode)); \
|
2023-02-26 07:18:31 +03:00
|
|
|
|
st_six = tcg_constant_i32(rB(ctx->opcode)); \
|
2016-07-27 09:56:23 +03:00
|
|
|
|
gen_helper_##op(rd, ra, st_six); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
VSHASIGMA(vshasigmaw)
|
|
|
|
|
VSHASIGMA(vshasigmad)
|
|
|
|
|
|
|
|
|
|
GEN_VXFORM3(vpermxor, 22, 0xFF)
|
|
|
|
|
GEN_VXFORM_DUAL(vsldoi, PPC_ALTIVEC, PPC_NONE,
|
|
|
|
|
vpermxor, PPC_NONE, PPC2_ALTIVEC_207)
|
|
|
|
|
|
2021-11-04 15:36:55 +03:00
|
|
|
|
static bool trans_VCFUGED(DisasContext *ctx, arg_VX *a)
|
|
|
|
|
{
|
|
|
|
|
static const GVecGen3 g = {
|
|
|
|
|
.fni8 = gen_helper_CFUGED,
|
|
|
|
|
.vece = MO_64,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16, &g);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-04 15:36:56 +03:00
|
|
|
|
static bool trans_VCLZDM(DisasContext *ctx, arg_VX *a)
|
|
|
|
|
{
|
|
|
|
|
static const GVecGen3i g = {
|
|
|
|
|
.fni8 = do_cntzdm,
|
|
|
|
|
.vece = MO_64,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_3i(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16, false, &g);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool trans_VCTZDM(DisasContext *ctx, arg_VX *a)
|
|
|
|
|
{
|
|
|
|
|
static const GVecGen3i g = {
|
|
|
|
|
.fni8 = do_cntzdm,
|
|
|
|
|
.vece = MO_64,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_3i(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16, true, &g);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-04 15:36:57 +03:00
|
|
|
|
static bool trans_VPDEPD(DisasContext *ctx, arg_VX *a)
|
|
|
|
|
{
|
|
|
|
|
static const GVecGen3 g = {
|
|
|
|
|
.fni8 = gen_helper_PDEPD,
|
|
|
|
|
.vece = MO_64,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16, &g);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool trans_VPEXTD(DisasContext *ctx, arg_VX *a)
|
|
|
|
|
{
|
|
|
|
|
static const GVecGen3 g = {
|
|
|
|
|
.fni8 = gen_helper_PEXTD,
|
|
|
|
|
.vece = MO_64,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16, &g);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool trans_VMSUMUDM(DisasContext *ctx, arg_VA *a)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 rl, rh, src1, src2;
|
|
|
|
|
int dw;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA300);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
rh = tcg_temp_new_i64();
|
|
|
|
|
rl = tcg_temp_new_i64();
|
|
|
|
|
src1 = tcg_temp_new_i64();
|
|
|
|
|
src2 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(rl, a->rc, false);
|
|
|
|
|
get_avr64(rh, a->rc, true);
|
|
|
|
|
|
|
|
|
|
for (dw = 0; dw < 2; dw++) {
|
|
|
|
|
get_avr64(src1, a->vra, dw);
|
|
|
|
|
get_avr64(src2, a->vrb, dw);
|
|
|
|
|
tcg_gen_mulu2_i64(src1, src2, src1, src2);
|
|
|
|
|
tcg_gen_add2_i64(rl, rh, rl, rh, src1, src2);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
set_avr64(a->vrt, rl, false);
|
|
|
|
|
set_avr64(a->vrt, rh, true);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-02 08:51:37 +03:00
|
|
|
|
static bool trans_VMSUMCUD(DisasContext *ctx, arg_VA *a)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 tmp0, tmp1, prod1h, prod1l, prod0h, prod0l, zero;
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tmp0 = tcg_temp_new_i64();
|
|
|
|
|
tmp1 = tcg_temp_new_i64();
|
|
|
|
|
prod1h = tcg_temp_new_i64();
|
|
|
|
|
prod1l = tcg_temp_new_i64();
|
|
|
|
|
prod0h = tcg_temp_new_i64();
|
|
|
|
|
prod0l = tcg_temp_new_i64();
|
|
|
|
|
zero = tcg_constant_i64(0);
|
|
|
|
|
|
|
|
|
|
/* prod1 = vsr[vra+32].dw[1] * vsr[vrb+32].dw[1] */
|
|
|
|
|
get_avr64(tmp0, a->vra, false);
|
|
|
|
|
get_avr64(tmp1, a->vrb, false);
|
|
|
|
|
tcg_gen_mulu2_i64(prod1l, prod1h, tmp0, tmp1);
|
|
|
|
|
|
|
|
|
|
/* prod0 = vsr[vra+32].dw[0] * vsr[vrb+32].dw[0] */
|
|
|
|
|
get_avr64(tmp0, a->vra, true);
|
|
|
|
|
get_avr64(tmp1, a->vrb, true);
|
|
|
|
|
tcg_gen_mulu2_i64(prod0l, prod0h, tmp0, tmp1);
|
|
|
|
|
|
|
|
|
|
/* Sum lower 64-bits elements */
|
|
|
|
|
get_avr64(tmp1, a->rc, false);
|
|
|
|
|
tcg_gen_add2_i64(tmp1, tmp0, tmp1, zero, prod1l, zero);
|
|
|
|
|
tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod0l, zero);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Discard lower 64-bits, leaving the carry into bit 64.
|
|
|
|
|
* Then sum the higher 64-bit elements.
|
|
|
|
|
*/
|
|
|
|
|
get_avr64(tmp1, a->rc, true);
|
|
|
|
|
tcg_gen_add2_i64(tmp1, tmp0, tmp0, zero, tmp1, zero);
|
|
|
|
|
tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod1h, zero);
|
|
|
|
|
tcg_gen_add2_i64(tmp1, tmp0, tmp1, tmp0, prod0h, zero);
|
|
|
|
|
|
|
|
|
|
/* Discard 64 more bits to complete the CHOP128(temp >> 128) */
|
|
|
|
|
set_avr64(a->vrt, tmp0, false);
|
|
|
|
|
set_avr64(a->vrt, zero, true);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
target/ppc: moved vector even and odd multiplication to decodetree
Moved the instructions vmulesb, vmulosb, vmuleub, vmuloub,
vmulesh, vmulosh, vmuleuh, vmulouh, vmulesw, vmulosw,
muleuw and vmulouw from legacy to decodetree. Implemented
the instructions vmulesd, vmulosd, vmuleud, vmuloud.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-3-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
2022-03-02 08:51:36 +03:00
|
|
|
|
static bool do_vx_helper(DisasContext *ctx, arg_VX *a,
|
|
|
|
|
void (*gen_helper)(TCGv_ptr, TCGv_ptr, TCGv_ptr))
|
|
|
|
|
{
|
|
|
|
|
TCGv_ptr ra, rb, rd;
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
ra = gen_avr_ptr(a->vra);
|
|
|
|
|
rb = gen_avr_ptr(a->vrb);
|
|
|
|
|
rd = gen_avr_ptr(a->vrt);
|
|
|
|
|
gen_helper(rd, ra, rb);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-06-06 18:00:34 +03:00
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VADDCUQ, do_vx_helper, gen_helper_VADDCUQ)
|
2022-06-06 18:00:32 +03:00
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VADDUQM, do_vx_helper, gen_helper_VADDUQM)
|
|
|
|
|
|
2022-06-06 18:00:31 +03:00
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VPMSUMD, do_vx_helper, gen_helper_VPMSUMD)
|
|
|
|
|
|
2022-06-06 18:00:37 +03:00
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VSUBCUQ, do_vx_helper, gen_helper_VSUBCUQ)
|
2022-06-06 18:00:35 +03:00
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VSUBUQM, do_vx_helper, gen_helper_VSUBUQM)
|
|
|
|
|
|
target/ppc: Move V(ADD|SUB)CUW to decodetree and use gvec
This patch moves VADDCUW and VSUBCUW to decodtree with gvec using an
implementation based on the helper, with the main difference being
changing the -1 (aka all bits set to 1) result returned by cmp when
true to +1. It also implemented a .fni4 version of those instructions
and dropped the helper.
vaddcuw:
rept loop master patch
8 12500 0,01008200 0,00612400 (-39.3%)
25 4000 0,01091500 0,00471600 (-56.8%)
100 1000 0,01332500 0,00593700 (-55.4%)
500 200 0,01998500 0,01275700 (-36.2%)
2500 40 0,04704300 0,04364300 (-7.2%)
8000 12 0,10748200 0,11241000 (+4.6%)
vsubcuw:
rept loop master patch
8 12500 0,01226200 0,00571600 (-53.4%)
25 4000 0,01493500 0,00462100 (-69.1%)
100 1000 0,01522700 0,00455100 (-70.1%)
500 200 0,02384600 0,01133500 (-52.5%)
2500 40 0,04935200 0,03178100 (-35.6%)
8000 12 0,09039900 0,09440600 (+4.4%)
Overall there was a gain in performance, but the TCGop code was still
slightly bigger in the new version (it went from 4 to 5).
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20221019125040.48028-4-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
2022-10-19 15:50:31 +03:00
|
|
|
|
static void gen_VADDCUW_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
|
|
|
|
|
{
|
|
|
|
|
tcg_gen_not_vec(vece, a, a);
|
|
|
|
|
tcg_gen_cmp_vec(TCG_COND_LTU, vece, t, a, b);
|
|
|
|
|
tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(t, vece, 1));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void gen_VADDCUW_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
|
|
|
|
|
{
|
|
|
|
|
tcg_gen_not_i32(a, a);
|
|
|
|
|
tcg_gen_setcond_i32(TCG_COND_LTU, t, a, b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void gen_VSUBCUW_vec(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
|
|
|
|
|
{
|
|
|
|
|
tcg_gen_cmp_vec(TCG_COND_GEU, vece, t, a, b);
|
|
|
|
|
tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(t, vece, 1));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void gen_VSUBCUW_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
|
|
|
|
|
{
|
|
|
|
|
tcg_gen_setcond_i32(TCG_COND_GEU, t, a, b);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool do_vx_vaddsubcuw(DisasContext *ctx, arg_VX *a, int add)
|
|
|
|
|
{
|
|
|
|
|
static const TCGOpcode vecop_list[] = {
|
|
|
|
|
INDEX_op_cmp_vec, 0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const GVecGen3 op[] = {
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_VSUBCUW_vec,
|
|
|
|
|
.fni4 = gen_VSUBCUW_i32,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.vece = MO_32
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_VADDCUW_vec,
|
|
|
|
|
.fni4 = gen_VADDCUW_i32,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.vece = MO_32
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_INSNS_FLAGS(ctx, ALTIVEC);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16, &op[add]);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VSUBCUW, do_vx_vaddsubcuw, 0)
|
|
|
|
|
TRANS(VADDCUW, do_vx_vaddsubcuw, 1)
|
|
|
|
|
|
target/ppc: moved vector even and odd multiplication to decodetree
Moved the instructions vmulesb, vmulosb, vmuleub, vmuloub,
vmulesh, vmulosh, vmuleuh, vmulouh, vmulesw, vmulosw,
muleuw and vmulouw from legacy to decodetree. Implemented
the instructions vmulesd, vmulosd, vmuleud, vmuloud.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-3-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
2022-03-02 08:51:36 +03:00
|
|
|
|
static bool do_vx_vmuleo(DisasContext *ctx, arg_VX *a, bool even,
|
|
|
|
|
void (*gen_mul)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 vra, vrb, vrt0, vrt1;
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
vra = tcg_temp_new_i64();
|
|
|
|
|
vrb = tcg_temp_new_i64();
|
|
|
|
|
vrt0 = tcg_temp_new_i64();
|
|
|
|
|
vrt1 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
get_avr64(vra, a->vra, even);
|
|
|
|
|
get_avr64(vrb, a->vrb, even);
|
|
|
|
|
gen_mul(vrt0, vrt1, vra, vrb);
|
|
|
|
|
set_avr64(a->vrt, vrt0, false);
|
|
|
|
|
set_avr64(a->vrt, vrt1, true);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-02 08:51:36 +03:00
|
|
|
|
static bool trans_VMULLD(DisasContext *ctx, arg_VX *a)
|
|
|
|
|
{
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_mul(MO_64, avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-05 09:16:47 +03:00
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMULESB, do_vx_helper, gen_helper_VMULESB)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMULOSB, do_vx_helper, gen_helper_VMULOSB)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMULEUB, do_vx_helper, gen_helper_VMULEUB)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMULOUB, do_vx_helper, gen_helper_VMULOUB)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMULESH, do_vx_helper, gen_helper_VMULESH)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMULOSH, do_vx_helper, gen_helper_VMULOSH)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMULEUH, do_vx_helper, gen_helper_VMULEUH)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VMULOUH, do_vx_helper, gen_helper_VMULOUH)
|
target/ppc: moved vector even and odd multiplication to decodetree
Moved the instructions vmulesb, vmulosb, vmuleub, vmuloub,
vmulesh, vmulosh, vmuleuh, vmulouh, vmulesw, vmulosw,
muleuw and vmulouw from legacy to decodetree. Implemented
the instructions vmulesd, vmulosd, vmuleud, vmuloud.
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Signed-off-by: Matheus Ferst <matheus.ferst@eldorado.org.br>
Message-Id: <20220225210936.1749575-3-matheus.ferst@eldorado.org.br>
Signed-off-by: Cédric Le Goater <clg@kaod.org>
2022-03-02 08:51:36 +03:00
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VMULESW, do_vx_helper, gen_helper_VMULESW)
|
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VMULOSW, do_vx_helper, gen_helper_VMULOSW)
|
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VMULEUW, do_vx_helper, gen_helper_VMULEUW)
|
|
|
|
|
TRANS_FLAGS2(ALTIVEC_207, VMULOUW, do_vx_helper, gen_helper_VMULOUW)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VMULESD, do_vx_vmuleo, true , tcg_gen_muls2_i64)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VMULOSD, do_vx_vmuleo, false, tcg_gen_muls2_i64)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VMULEUD, do_vx_vmuleo, true , tcg_gen_mulu2_i64)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VMULOUD, do_vx_vmuleo, false, tcg_gen_mulu2_i64)
|
|
|
|
|
|
2022-03-02 08:51:36 +03:00
|
|
|
|
static void do_vx_vmulhw_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, bool sign)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 hh, lh, temp;
|
|
|
|
|
|
|
|
|
|
hh = tcg_temp_new_i64();
|
|
|
|
|
lh = tcg_temp_new_i64();
|
|
|
|
|
temp = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
if (sign) {
|
|
|
|
|
tcg_gen_ext32s_i64(lh, a);
|
|
|
|
|
tcg_gen_ext32s_i64(temp, b);
|
|
|
|
|
} else {
|
2022-03-05 09:16:47 +03:00
|
|
|
|
tcg_gen_ext32u_i64(lh, a);
|
|
|
|
|
tcg_gen_ext32u_i64(temp, b);
|
2022-03-02 08:51:36 +03:00
|
|
|
|
}
|
|
|
|
|
tcg_gen_mul_i64(lh, lh, temp);
|
|
|
|
|
|
|
|
|
|
if (sign) {
|
|
|
|
|
tcg_gen_sari_i64(hh, a, 32);
|
|
|
|
|
tcg_gen_sari_i64(temp, b, 32);
|
|
|
|
|
} else {
|
|
|
|
|
tcg_gen_shri_i64(hh, a, 32);
|
|
|
|
|
tcg_gen_shri_i64(temp, b, 32);
|
|
|
|
|
}
|
|
|
|
|
tcg_gen_mul_i64(hh, hh, temp);
|
|
|
|
|
|
|
|
|
|
tcg_gen_shri_i64(lh, lh, 32);
|
2022-03-05 09:16:47 +03:00
|
|
|
|
tcg_gen_deposit_i64(t, hh, lh, 0, 32);
|
2022-03-02 08:51:36 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void do_vx_vmulhd_i64(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b, bool sign)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 tlow;
|
|
|
|
|
|
|
|
|
|
tlow = tcg_temp_new_i64();
|
|
|
|
|
if (sign) {
|
|
|
|
|
tcg_gen_muls2_i64(tlow, t, a, b);
|
|
|
|
|
} else {
|
|
|
|
|
tcg_gen_mulu2_i64(tlow, t, a, b);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool do_vx_mulh(DisasContext *ctx, arg_VX *a, bool sign,
|
|
|
|
|
void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, bool))
|
|
|
|
|
{
|
|
|
|
|
REQUIRE_INSNS_FLAGS2(ctx, ISA310);
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
TCGv_i64 vra, vrb, vrt;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
vra = tcg_temp_new_i64();
|
|
|
|
|
vrb = tcg_temp_new_i64();
|
|
|
|
|
vrt = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < 2; i++) {
|
|
|
|
|
get_avr64(vra, a->vra, i);
|
|
|
|
|
get_avr64(vrb, a->vrb, i);
|
|
|
|
|
get_avr64(vrt, a->vrt, i);
|
|
|
|
|
|
|
|
|
|
func(vrt, vra, vrb, sign);
|
|
|
|
|
|
|
|
|
|
set_avr64(a->vrt, vrt, i);
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS(VMULHSW, do_vx_mulh, true , do_vx_vmulhw_i64)
|
|
|
|
|
TRANS(VMULHSD, do_vx_mulh, true , do_vx_vmulhd_i64)
|
|
|
|
|
TRANS(VMULHUW, do_vx_mulh, false, do_vx_vmulhw_i64)
|
|
|
|
|
TRANS(VMULHUD, do_vx_mulh, false, do_vx_vmulhd_i64)
|
2022-03-02 08:51:36 +03:00
|
|
|
|
|
target/ppc: Move VAVG[SU][BHW] to decodetree and use gvec
Moved the instructions VAVGUB, VAVGUH, VAVGUW, VAVGSB, VAVGSH, VAVGSW,
to decodetree and use gvec with them. For these one the right shift
had to be made before the sum as to avoid an overflow, so add 1 at the
end if any of the entries had 1 in its LSB as to replicate the "+ 1"
before the shift described by the ISA.
vavgub:
rept loop master patch
8 12500 0,02616600 0,00754200 (-71.2%)
25 4000 0,02530000 0,00637700 (-74.8%)
100 1000 0,02604600 0,00790100 (-69.7%)
500 200 0,03189300 0,01838400 (-42.4%)
2500 40 0,06006900 0,06851000 (+14.1%)
8000 12 0,13941000 0,20548500 (+47.4%)
vavguh:
rept loop master patch
8 12500 0,01818200 0,00780600 (-57.1%)
25 4000 0,01789300 0,00641600 (-64.1%)
100 1000 0,01899100 0,00787200 (-58.5%)
500 200 0,02527200 0,01828400 (-27.7%)
2500 40 0,05361800 0,06773000 (+26.3%)
8000 12 0,12886600 0,20291400 (+57.5%)
vavguw:
rept loop master patch
8 12500 0,01423100 0,00776600 (-45.4%)
25 4000 0,01780800 0,00638600 (-64.1%)
100 1000 0,02085500 0,00787000 (-62.3%)
500 200 0,02737100 0,01828800 (-33.2%)
2500 40 0,05572600 0,06774200 (+21.6%)
8000 12 0,13101700 0,20311600 (+55.0%)
vavgsb:
rept loop master patch
8 12500 0,03006000 0,00788600 (-73.8%)
25 4000 0,02882200 0,00637800 (-77.9%)
100 1000 0,02958000 0,00791400 (-73.2%)
500 200 0,03548800 0,01860400 (-47.6%)
2500 40 0,06360000 0,06850800 (+7.7%)
8000 12 0,13816500 0,20550300 (+48.7%)
vavgsh:
rept loop master patch
8 12500 0,01965900 0,00776600 (-60.5%)
25 4000 0,01875400 0,00638700 (-65.9%)
100 1000 0,01952200 0,00786900 (-59.7%)
500 200 0,02562000 0,01760300 (-31.3%)
2500 40 0,05384300 0,06742800 (+25.2%)
8000 12 0,13240800 0,20330000 (+53.5%)
vavgsw:
rept loop master patch
8 12500 0,01407700 0,00775600 (-44.9%)
25 4000 0,01762300 0,00640000 (-63.7%)
100 1000 0,02046500 0,00788500 (-61.5%)
500 200 0,02745600 0,01843000 (-32.9%)
2500 40 0,05375500 0,06820500 (+26.9%)
8000 12 0,13068300 0,20304900 (+55.4%)
These results to me seems to indicate that with gvec the results have a
slower translation but faster execution.
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20221019125040.48028-7-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
2022-10-19 15:50:34 +03:00
|
|
|
|
static void do_vavg(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b,
|
|
|
|
|
void (*gen_shr_vec)(unsigned, TCGv_vec, TCGv_vec, int64_t))
|
|
|
|
|
{
|
|
|
|
|
TCGv_vec tmp = tcg_temp_new_vec_matching(t);
|
|
|
|
|
tcg_gen_or_vec(vece, tmp, a, b);
|
|
|
|
|
tcg_gen_and_vec(vece, tmp, tmp, tcg_constant_vec_matching(t, vece, 1));
|
|
|
|
|
gen_shr_vec(vece, a, a, 1);
|
|
|
|
|
gen_shr_vec(vece, b, b, 1);
|
|
|
|
|
tcg_gen_add_vec(vece, t, a, b);
|
|
|
|
|
tcg_gen_add_vec(vece, t, t, tmp);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QEMU_FLATTEN
|
|
|
|
|
static void gen_vavgu(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
|
|
|
|
|
{
|
|
|
|
|
do_vavg(vece, t, a, b, tcg_gen_shri_vec);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QEMU_FLATTEN
|
|
|
|
|
static void gen_vavgs(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
|
|
|
|
|
{
|
|
|
|
|
do_vavg(vece, t, a, b, tcg_gen_sari_vec);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool do_vx_vavg(DisasContext *ctx, arg_VX *a, int sign, int vece)
|
|
|
|
|
{
|
|
|
|
|
static const TCGOpcode vecop_list_s[] = {
|
|
|
|
|
INDEX_op_add_vec, INDEX_op_sari_vec, 0
|
|
|
|
|
};
|
|
|
|
|
static const TCGOpcode vecop_list_u[] = {
|
|
|
|
|
INDEX_op_add_vec, INDEX_op_shri_vec, 0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const GVecGen3 op[2][3] = {
|
|
|
|
|
{
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vavgu,
|
|
|
|
|
.fno = gen_helper_VAVGUB,
|
|
|
|
|
.opt_opc = vecop_list_u,
|
|
|
|
|
.vece = MO_8
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vavgu,
|
|
|
|
|
.fno = gen_helper_VAVGUH,
|
|
|
|
|
.opt_opc = vecop_list_u,
|
|
|
|
|
.vece = MO_16
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vavgu,
|
|
|
|
|
.fno = gen_helper_VAVGUW,
|
|
|
|
|
.opt_opc = vecop_list_u,
|
|
|
|
|
.vece = MO_32
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vavgs,
|
|
|
|
|
.fno = gen_helper_VAVGSB,
|
|
|
|
|
.opt_opc = vecop_list_s,
|
|
|
|
|
.vece = MO_8
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vavgs,
|
|
|
|
|
.fno = gen_helper_VAVGSH,
|
|
|
|
|
.opt_opc = vecop_list_s,
|
|
|
|
|
.vece = MO_16
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vavgs,
|
|
|
|
|
.fno = gen_helper_VAVGSW,
|
|
|
|
|
.opt_opc = vecop_list_s,
|
|
|
|
|
.vece = MO_32
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16, &op[sign][vece]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VAVGSB, do_vx_vavg, 1, MO_8)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VAVGSH, do_vx_vavg, 1, MO_16)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VAVGSW, do_vx_vavg, 1, MO_32)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VAVGUB, do_vx_vavg, 0, MO_8)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VAVGUH, do_vx_vavg, 0, MO_16)
|
|
|
|
|
TRANS_FLAGS(ALTIVEC, VAVGUW, do_vx_vavg, 0, MO_32)
|
|
|
|
|
|
target/ppc: Move VABSDU[BHW] to decodetree and use gvec
Moved VABSDUB, VABSDUH and VABSDUW to decodetree and use gvec to
translate them.
vabsdub:
rept loop master patch
8 12500 0,03601600 0,00688500 (-80.9%)
25 4000 0,03651000 0,00532100 (-85.4%)
100 1000 0,03666900 0,00595300 (-83.8%)
500 200 0,04305800 0,01244600 (-71.1%)
2500 40 0,06893300 0,04273700 (-38.0%)
8000 12 0,14633200 0,12660300 (-13.5%)
vabsduh:
rept loop master patch
8 12500 0,02172400 0,00687500 (-68.4%)
25 4000 0,02154100 0,00531500 (-75.3%)
100 1000 0,02235400 0,00596300 (-73.3%)
500 200 0,02827500 0,01245100 (-56.0%)
2500 40 0,05638400 0,04285500 (-24.0%)
8000 12 0,13166000 0,12641400 (-4.0%)
vabsduw:
rept loop master patch
8 12500 0,01646400 0,00688300 (-58.2%)
25 4000 0,01454500 0,00475500 (-67.3%)
100 1000 0,01545800 0,00511800 (-66.9%)
500 200 0,02168200 0,01114300 (-48.6%)
2500 40 0,04571300 0,04138800 (-9.5%)
8000 12 0,12209500 0,12178500 (-0.3%)
Same as VADDCUW and VSUBCUW, overall performance gain but it uses more
TCGop (4 before the patch, 6 after).
Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20221019125040.48028-8-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
2022-10-19 15:50:35 +03:00
|
|
|
|
static void gen_vabsdu(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
|
|
|
|
|
{
|
|
|
|
|
tcg_gen_umax_vec(vece, t, a, b);
|
|
|
|
|
tcg_gen_umin_vec(vece, a, a, b);
|
|
|
|
|
tcg_gen_sub_vec(vece, t, t, a);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool do_vabsdu(DisasContext *ctx, arg_VX *a, const int vece)
|
|
|
|
|
{
|
|
|
|
|
static const TCGOpcode vecop_list[] = {
|
|
|
|
|
INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const GVecGen3 op[] = {
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vabsdu,
|
|
|
|
|
.fno = gen_helper_VABSDUB,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.vece = MO_8
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vabsdu,
|
|
|
|
|
.fno = gen_helper_VABSDUH,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.vece = MO_16
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
.fniv = gen_vabsdu,
|
|
|
|
|
.fno = gen_helper_VABSDUW,
|
|
|
|
|
.opt_opc = vecop_list,
|
|
|
|
|
.vece = MO_32
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16, &op[vece]);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
TRANS_FLAGS2(ISA300, VABSDUB, do_vabsdu, MO_8)
|
|
|
|
|
TRANS_FLAGS2(ISA300, VABSDUH, do_vabsdu, MO_16)
|
|
|
|
|
TRANS_FLAGS2(ISA300, VABSDUW, do_vabsdu, MO_32)
|
|
|
|
|
|
2022-05-25 16:49:47 +03:00
|
|
|
|
static bool do_vdiv_vmod(DisasContext *ctx, arg_VX *a, const int vece,
|
|
|
|
|
void (*func_32)(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b),
|
|
|
|
|
void (*func_64)(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b))
|
|
|
|
|
{
|
|
|
|
|
const GVecGen3 op = {
|
|
|
|
|
.fni4 = func_32,
|
|
|
|
|
.fni8 = func_64,
|
|
|
|
|
.vece = vece
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
REQUIRE_VECTOR(ctx);
|
|
|
|
|
|
|
|
|
|
tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
|
|
|
|
|
avr_full_offset(a->vrb), 16, 16, &op);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define DIVU32(NAME, DIV) \
|
|
|
|
|
static void NAME(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_i32 zero = tcg_constant_i32(0); \
|
|
|
|
|
TCGv_i32 one = tcg_constant_i32(1); \
|
|
|
|
|
tcg_gen_movcond_i32(TCG_COND_EQ, b, b, zero, one, b); \
|
|
|
|
|
DIV(t, a, b); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define DIVS32(NAME, DIV) \
|
|
|
|
|
static void NAME(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_i32 t0 = tcg_temp_new_i32(); \
|
|
|
|
|
TCGv_i32 t1 = tcg_temp_new_i32(); \
|
|
|
|
|
tcg_gen_setcondi_i32(TCG_COND_EQ, t0, a, INT32_MIN); \
|
|
|
|
|
tcg_gen_setcondi_i32(TCG_COND_EQ, t1, b, -1); \
|
|
|
|
|
tcg_gen_and_i32(t0, t0, t1); \
|
|
|
|
|
tcg_gen_setcondi_i32(TCG_COND_EQ, t1, b, 0); \
|
|
|
|
|
tcg_gen_or_i32(t0, t0, t1); \
|
|
|
|
|
tcg_gen_movi_i32(t1, 0); \
|
|
|
|
|
tcg_gen_movcond_i32(TCG_COND_NE, b, t0, t1, t0, b); \
|
|
|
|
|
DIV(t, a, b); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define DIVU64(NAME, DIV) \
|
|
|
|
|
static void NAME(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_i64 zero = tcg_constant_i64(0); \
|
|
|
|
|
TCGv_i64 one = tcg_constant_i64(1); \
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_EQ, b, b, zero, one, b); \
|
|
|
|
|
DIV(t, a, b); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define DIVS64(NAME, DIV) \
|
|
|
|
|
static void NAME(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b) \
|
|
|
|
|
{ \
|
|
|
|
|
TCGv_i64 t0 = tcg_temp_new_i64(); \
|
|
|
|
|
TCGv_i64 t1 = tcg_temp_new_i64(); \
|
|
|
|
|
tcg_gen_setcondi_i64(TCG_COND_EQ, t0, a, INT64_MIN); \
|
|
|
|
|
tcg_gen_setcondi_i64(TCG_COND_EQ, t1, b, -1); \
|
|
|
|
|
tcg_gen_and_i64(t0, t0, t1); \
|
|
|
|
|
tcg_gen_setcondi_i64(TCG_COND_EQ, t1, b, 0); \
|
|
|
|
|
tcg_gen_or_i64(t0, t0, t1); \
|
|
|
|
|
tcg_gen_movi_i64(t1, 0); \
|
|
|
|
|
tcg_gen_movcond_i64(TCG_COND_NE, b, t0, t1, t0, b); \
|
|
|
|
|
DIV(t, a, b); \
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DIVS32(do_divsw, tcg_gen_div_i32)
|
|
|
|
|
DIVU32(do_divuw, tcg_gen_divu_i32)
|
|
|
|
|
DIVS64(do_divsd, tcg_gen_div_i64)
|
|
|
|
|
DIVU64(do_divud, tcg_gen_divu_i64)
|
|
|
|
|
|
|
|
|
|
TRANS_FLAGS2(ISA310, VDIVSW, do_vdiv_vmod, MO_32, do_divsw, NULL)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VDIVUW, do_vdiv_vmod, MO_32, do_divuw, NULL)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VDIVSD, do_vdiv_vmod, MO_64, NULL, do_divsd)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VDIVUD, do_vdiv_vmod, MO_64, NULL, do_divud)
|
2022-05-25 16:49:48 +03:00
|
|
|
|
TRANS_FLAGS2(ISA310, VDIVSQ, do_vx_helper, gen_helper_VDIVSQ)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VDIVUQ, do_vx_helper, gen_helper_VDIVUQ)
|
2022-05-25 16:49:47 +03:00
|
|
|
|
|
2022-05-25 16:49:49 +03:00
|
|
|
|
static void do_dives_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 val1, val2;
|
|
|
|
|
|
|
|
|
|
val1 = tcg_temp_new_i64();
|
|
|
|
|
val2 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
tcg_gen_ext_i32_i64(val1, a);
|
|
|
|
|
tcg_gen_ext_i32_i64(val2, b);
|
|
|
|
|
|
|
|
|
|
/* (a << 32)/b */
|
|
|
|
|
tcg_gen_shli_i64(val1, val1, 32);
|
|
|
|
|
tcg_gen_div_i64(val1, val1, val2);
|
|
|
|
|
|
|
|
|
|
/* if quotient doesn't fit in 32 bits the result is undefined */
|
|
|
|
|
tcg_gen_extrl_i64_i32(t, val1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void do_diveu_i32(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
|
|
|
|
|
{
|
|
|
|
|
TCGv_i64 val1, val2;
|
|
|
|
|
|
|
|
|
|
val1 = tcg_temp_new_i64();
|
|
|
|
|
val2 = tcg_temp_new_i64();
|
|
|
|
|
|
|
|
|
|
tcg_gen_extu_i32_i64(val1, a);
|
|
|
|
|
tcg_gen_extu_i32_i64(val2, b);
|
|
|
|
|
|
|
|
|
|
/* (a << 32)/b */
|
|
|
|
|
tcg_gen_shli_i64(val1, val1, 32);
|
|
|
|
|
tcg_gen_divu_i64(val1, val1, val2);
|
|
|
|
|
|
|
|
|
|
/* if quotient doesn't fit in 32 bits the result is undefined */
|
|
|
|
|
tcg_gen_extrl_i64_i32(t, val1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DIVS32(do_divesw, do_dives_i32)
|
|
|
|
|
DIVU32(do_diveuw, do_diveu_i32)
|
|
|
|
|
|
2022-05-25 16:49:53 +03:00
|
|
|
|
DIVS32(do_modsw, tcg_gen_rem_i32)
|
|
|
|
|
DIVU32(do_moduw, tcg_gen_remu_i32)
|
|
|
|
|
DIVS64(do_modsd, tcg_gen_rem_i64)
|
|
|
|
|
DIVU64(do_modud, tcg_gen_remu_i64)
|
|
|
|
|
|
2022-05-25 16:49:49 +03:00
|
|
|
|
TRANS_FLAGS2(ISA310, VDIVESW, do_vdiv_vmod, MO_32, do_divesw, NULL)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VDIVEUW, do_vdiv_vmod, MO_32, do_diveuw, NULL)
|
2022-05-25 16:49:52 +03:00
|
|
|
|
TRANS_FLAGS2(ISA310, VDIVESD, do_vx_helper, gen_helper_VDIVESD)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VDIVEUD, do_vx_helper, gen_helper_VDIVEUD)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VDIVESQ, do_vx_helper, gen_helper_VDIVESQ)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VDIVEUQ, do_vx_helper, gen_helper_VDIVEUQ)
|
2022-05-25 16:49:49 +03:00
|
|
|
|
|
2022-05-25 16:49:53 +03:00
|
|
|
|
TRANS_FLAGS2(ISA310, VMODSW, do_vdiv_vmod, MO_32, do_modsw , NULL)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VMODUW, do_vdiv_vmod, MO_32, do_moduw, NULL)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VMODSD, do_vdiv_vmod, MO_64, NULL, do_modsd)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VMODUD, do_vdiv_vmod, MO_64, NULL, do_modud)
|
2022-05-25 16:49:54 +03:00
|
|
|
|
TRANS_FLAGS2(ISA310, VMODSQ, do_vx_helper, gen_helper_VMODSQ)
|
|
|
|
|
TRANS_FLAGS2(ISA310, VMODUQ, do_vx_helper, gen_helper_VMODUQ)
|
2022-05-25 16:49:53 +03:00
|
|
|
|
|
2022-05-25 16:49:47 +03:00
|
|
|
|
#undef DIVS32
|
|
|
|
|
#undef DIVU32
|
|
|
|
|
#undef DIVS64
|
|
|
|
|
#undef DIVU64
|
|
|
|
|
|
2016-07-27 09:56:23 +03:00
|
|
|
|
#undef GEN_VR_LDX
|
|
|
|
|
#undef GEN_VR_STX
|
|
|
|
|
#undef GEN_VR_LVE
|
|
|
|
|
#undef GEN_VR_STVE
|
|
|
|
|
|
|
|
|
|
#undef GEN_VX_LOGICAL
|
|
|
|
|
#undef GEN_VX_LOGICAL_207
|
|
|
|
|
#undef GEN_VXFORM
|
|
|
|
|
#undef GEN_VXFORM_207
|
|
|
|
|
#undef GEN_VXFORM_DUAL
|
|
|
|
|
#undef GEN_VXRFORM_DUAL
|
|
|
|
|
#undef GEN_VXRFORM1
|
|
|
|
|
#undef GEN_VXRFORM
|
2020-03-29 00:58:36 +03:00
|
|
|
|
#undef GEN_VXFORM_VSPLTI
|
2016-07-27 09:56:23 +03:00
|
|
|
|
#undef GEN_VXFORM_NOA
|
|
|
|
|
#undef GEN_VXFORM_UIMM
|
|
|
|
|
#undef GEN_VAFORM_PAIRED
|
2016-11-08 19:50:22 +03:00
|
|
|
|
|
|
|
|
|
#undef GEN_BCD2
|