diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 8254f9f650..c0434fa2f8 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -206,6 +206,7 @@ typedef enum S390Opcode { RRFc_LOCR = 0xb9f2, RRFc_LOCGR = 0xb9e2, + RRFc_POPCNT = 0xb9e1, RR_AR = 0x1a, RR_ALR = 0x1e, @@ -1435,6 +1436,32 @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1, tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2); } +static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src) +{ + /* With MIE3, and bit 0 of m4 set, we get the complete result. */ + if (HAVE_FACILITY(MISC_INSN_EXT3)) { + if (type == TCG_TYPE_I32) { + tgen_ext32u(s, dest, src); + src = dest; + } + tcg_out_insn(s, RRFc, POPCNT, dest, src, 8); + return; + } + + /* Without MIE3, each byte gets the count of bits for the byte. */ + tcg_out_insn(s, RRFc, POPCNT, dest, src, 0); + + /* Multiply to sum each byte at the top of the word. */ + if (type == TCG_TYPE_I32) { + tcg_out_insn(s, RIL, MSFI, dest, 0x01010101); + tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull); + tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0); + tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56); + } +} + static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src, int ofs, int len, int z) { @@ -2584,6 +2611,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tgen_clz(s, args[0], args[1], args[2], const_args[2]); break; + case INDEX_op_ctpop_i32: + tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]); + break; + case INDEX_op_ctpop_i64: + tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]); + break; + case INDEX_op_mb: /* The host memory model is quite strong, we simply need to serialize the instruction stream. */ @@ -3146,6 +3180,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_extu_i32_i64: case INDEX_op_extract_i32: case INDEX_op_extract_i64: + case INDEX_op_ctpop_i32: + case INDEX_op_ctpop_i64: return C_O1_I1(r, r); case INDEX_op_qemu_ld_i32: diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index dabdae1e84..68dcbc6645 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -91,7 +91,7 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_nor_i32 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 -#define TCG_TARGET_HAS_ctpop_i32 0 +#define TCG_TARGET_HAS_ctpop_i32 1 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 0 @@ -128,7 +128,7 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_nor_i64 HAVE_FACILITY(MISC_INSN_EXT3) #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 0 -#define TCG_TARGET_HAS_ctpop_i64 0 +#define TCG_TARGET_HAS_ctpop_i64 1 #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0