tcg/s390x: Implement ctpop operation
There is an older form that produces per-byte results, and a newer form that produces per-register results. Reviewed-by: Ilya Leoshkevich <iii@linux.ibm.com> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
bfff851842
commit
29a5ea738a
@ -206,6 +206,7 @@ typedef enum S390Opcode {
|
||||
|
||||
RRFc_LOCR = 0xb9f2,
|
||||
RRFc_LOCGR = 0xb9e2,
|
||||
RRFc_POPCNT = 0xb9e1,
|
||||
|
||||
RR_AR = 0x1a,
|
||||
RR_ALR = 0x1e,
|
||||
@ -1435,6 +1436,32 @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
|
||||
tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
|
||||
}
|
||||
|
||||
static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
|
||||
{
|
||||
/* With MIE3, and bit 0 of m4 set, we get the complete result. */
|
||||
if (HAVE_FACILITY(MISC_INSN_EXT3)) {
|
||||
if (type == TCG_TYPE_I32) {
|
||||
tgen_ext32u(s, dest, src);
|
||||
src = dest;
|
||||
}
|
||||
tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Without MIE3, each byte gets the count of bits for the byte. */
|
||||
tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
|
||||
|
||||
/* Multiply to sum each byte at the top of the word. */
|
||||
if (type == TCG_TYPE_I32) {
|
||||
tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
|
||||
tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
|
||||
} else {
|
||||
tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
|
||||
tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
|
||||
tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
|
||||
}
|
||||
}
|
||||
|
||||
static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
|
||||
int ofs, int len, int z)
|
||||
{
|
||||
@ -2584,6 +2611,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
tgen_clz(s, args[0], args[1], args[2], const_args[2]);
|
||||
break;
|
||||
|
||||
case INDEX_op_ctpop_i32:
|
||||
tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
|
||||
break;
|
||||
case INDEX_op_ctpop_i64:
|
||||
tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
|
||||
break;
|
||||
|
||||
case INDEX_op_mb:
|
||||
/* The host memory model is quite strong, we simply need to
|
||||
serialize the instruction stream. */
|
||||
@ -3146,6 +3180,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
|
||||
case INDEX_op_extu_i32_i64:
|
||||
case INDEX_op_extract_i32:
|
||||
case INDEX_op_extract_i64:
|
||||
case INDEX_op_ctpop_i32:
|
||||
case INDEX_op_ctpop_i64:
|
||||
return C_O1_I1(r, r);
|
||||
|
||||
case INDEX_op_qemu_ld_i32:
|
||||
|
@ -91,7 +91,7 @@ extern uint64_t s390_facilities[3];
|
||||
#define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3)
|
||||
#define TCG_TARGET_HAS_clz_i32 0
|
||||
#define TCG_TARGET_HAS_ctz_i32 0
|
||||
#define TCG_TARGET_HAS_ctpop_i32 0
|
||||
#define TCG_TARGET_HAS_ctpop_i32 1
|
||||
#define TCG_TARGET_HAS_deposit_i32 1
|
||||
#define TCG_TARGET_HAS_extract_i32 1
|
||||
#define TCG_TARGET_HAS_sextract_i32 0
|
||||
@ -128,7 +128,7 @@ extern uint64_t s390_facilities[3];
|
||||
#define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3)
|
||||
#define TCG_TARGET_HAS_clz_i64 1
|
||||
#define TCG_TARGET_HAS_ctz_i64 0
|
||||
#define TCG_TARGET_HAS_ctpop_i64 0
|
||||
#define TCG_TARGET_HAS_ctpop_i64 1
|
||||
#define TCG_TARGET_HAS_deposit_i64 1
|
||||
#define TCG_TARGET_HAS_extract_i64 1
|
||||
#define TCG_TARGET_HAS_sextract_i64 0
|
||||
|
Loading…
Reference in New Issue
Block a user