target-tilegx: Implement complex multiply instructions
Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
parent
0b4232f108
commit
9ff5b57c21
@ -97,3 +97,43 @@ uint64_t helper_crc32_32(uint64_t accum, uint64_t input)
|
|||||||
/* zlib crc32 converts the accumulator and output to one's complement. */
|
/* zlib crc32 converts the accumulator and output to one's complement. */
|
||||||
return crc32(accum ^ 0xffffffff, buf, 4) ^ 0xffffffff;
|
return crc32(accum ^ 0xffffffff, buf, 4) ^ 0xffffffff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t helper_cmula(uint64_t srcd, uint64_t srca, uint64_t srcb)
|
||||||
|
{
|
||||||
|
uint32_t reala = (int16_t)srca;
|
||||||
|
uint32_t imaga = (int16_t)(srca >> 16);
|
||||||
|
uint32_t realb = (int16_t)srcb;
|
||||||
|
uint32_t imagb = (int16_t)(srcb >> 16);
|
||||||
|
uint32_t reald = srcd;
|
||||||
|
uint32_t imagd = srcd >> 32;
|
||||||
|
uint32_t realr = reala * realb - imaga * imagb + reald;
|
||||||
|
uint32_t imagr = reala * imagb + imaga * realb + imagd;
|
||||||
|
|
||||||
|
return deposit64(realr, 32, 32, imagr);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t helper_cmulaf(uint64_t srcd, uint64_t srca, uint64_t srcb)
|
||||||
|
{
|
||||||
|
uint32_t reala = (int16_t)srca;
|
||||||
|
uint32_t imaga = (int16_t)(srca >> 16);
|
||||||
|
uint32_t realb = (int16_t)srcb;
|
||||||
|
uint32_t imagb = (int16_t)(srcb >> 16);
|
||||||
|
uint32_t reald = (int16_t)srcd;
|
||||||
|
uint32_t imagd = (int16_t)(srcd >> 16);
|
||||||
|
int32_t realr = reala * realb - imaga * imagb;
|
||||||
|
int32_t imagr = reala * imagb + imaga * realb;
|
||||||
|
|
||||||
|
return deposit32((realr >> 15) + reald, 16, 16, (imagr >> 15) + imagd);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t helper_cmul2(uint64_t srca, uint64_t srcb, int shift, int round)
|
||||||
|
{
|
||||||
|
uint32_t reala = (int16_t)srca;
|
||||||
|
uint32_t imaga = (int16_t)(srca >> 16);
|
||||||
|
uint32_t realb = (int16_t)srcb;
|
||||||
|
uint32_t imagb = (int16_t)(srcb >> 16);
|
||||||
|
int32_t realr = reala * realb - imaga * imagb + round;
|
||||||
|
int32_t imagr = reala * imagb + imaga * realb + round;
|
||||||
|
|
||||||
|
return deposit32(realr >> shift, 16, 16, imagr >> shift);
|
||||||
|
}
|
||||||
|
@ -6,6 +6,9 @@ DEF_HELPER_FLAGS_1(revbits, TCG_CALL_NO_RWG_SE, i64, i64)
|
|||||||
DEF_HELPER_FLAGS_3(shufflebytes, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
|
DEF_HELPER_FLAGS_3(shufflebytes, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
|
||||||
DEF_HELPER_FLAGS_2(crc32_8, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
DEF_HELPER_FLAGS_2(crc32_8, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
||||||
DEF_HELPER_FLAGS_2(crc32_32, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
DEF_HELPER_FLAGS_2(crc32_32, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
||||||
|
DEF_HELPER_FLAGS_3(cmula, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
|
||||||
|
DEF_HELPER_FLAGS_3(cmulaf, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
|
||||||
|
DEF_HELPER_FLAGS_4(cmul2, TCG_CALL_NO_RWG_SE, i64, i64, i64, int, int)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_2(v1multu, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
DEF_HELPER_FLAGS_2(v1multu, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
||||||
DEF_HELPER_FLAGS_2(v1shl, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
DEF_HELPER_FLAGS_2(v1shl, TCG_CALL_NO_RWG_SE, i64, i64, i64)
|
||||||
|
@ -276,6 +276,15 @@ static void gen_mul_half(TCGv tdest, TCGv tsrca, TCGv tsrcb,
|
|||||||
tcg_temp_free(t);
|
tcg_temp_free(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gen_cmul2(TCGv tdest, TCGv tsrca, TCGv tsrcb, int sh, int rd)
|
||||||
|
{
|
||||||
|
TCGv_i32 tsh = tcg_const_i32(sh);
|
||||||
|
TCGv_i32 trd = tcg_const_i32(rd);
|
||||||
|
gen_helper_cmul2(tdest, tsrca, tsrcb, tsh, trd);
|
||||||
|
tcg_temp_free_i32(tsh);
|
||||||
|
tcg_temp_free_i32(trd);
|
||||||
|
}
|
||||||
|
|
||||||
static TileExcp gen_st_opcode(DisasContext *dc, unsigned dest, unsigned srca,
|
static TileExcp gen_st_opcode(DisasContext *dc, unsigned dest, unsigned srca,
|
||||||
unsigned srcb, TCGMemOp memop, const char *name)
|
unsigned srcb, TCGMemOp memop, const char *name)
|
||||||
{
|
{
|
||||||
@ -759,13 +768,33 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
|
|||||||
mnemonic = "cmpne";
|
mnemonic = "cmpne";
|
||||||
break;
|
break;
|
||||||
case OE_RRR(CMULAF, 0, X0):
|
case OE_RRR(CMULAF, 0, X0):
|
||||||
|
gen_helper_cmulaf(tdest, load_gr(dc, dest), tsrca, tsrcb);
|
||||||
|
mnemonic = "cmulaf";
|
||||||
|
break;
|
||||||
case OE_RRR(CMULA, 0, X0):
|
case OE_RRR(CMULA, 0, X0):
|
||||||
|
gen_helper_cmula(tdest, load_gr(dc, dest), tsrca, tsrcb);
|
||||||
|
mnemonic = "cmula";
|
||||||
|
break;
|
||||||
case OE_RRR(CMULFR, 0, X0):
|
case OE_RRR(CMULFR, 0, X0):
|
||||||
|
gen_cmul2(tdest, tsrca, tsrcb, 15, 1 << 14);
|
||||||
|
mnemonic = "cmulfr";
|
||||||
|
break;
|
||||||
case OE_RRR(CMULF, 0, X0):
|
case OE_RRR(CMULF, 0, X0):
|
||||||
|
gen_cmul2(tdest, tsrca, tsrcb, 15, 0);
|
||||||
|
mnemonic = "cmulf";
|
||||||
|
break;
|
||||||
case OE_RRR(CMULHR, 0, X0):
|
case OE_RRR(CMULHR, 0, X0):
|
||||||
|
gen_cmul2(tdest, tsrca, tsrcb, 16, 1 << 15);
|
||||||
|
mnemonic = "cmulhr";
|
||||||
|
break;
|
||||||
case OE_RRR(CMULH, 0, X0):
|
case OE_RRR(CMULH, 0, X0):
|
||||||
|
gen_cmul2(tdest, tsrca, tsrcb, 16, 0);
|
||||||
|
mnemonic = "cmulh";
|
||||||
|
break;
|
||||||
case OE_RRR(CMUL, 0, X0):
|
case OE_RRR(CMUL, 0, X0):
|
||||||
return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
|
gen_helper_cmula(tdest, load_zero(dc), tsrca, tsrcb);
|
||||||
|
mnemonic = "cmul";
|
||||||
|
break;
|
||||||
case OE_RRR(CRC32_32, 0, X0):
|
case OE_RRR(CRC32_32, 0, X0):
|
||||||
gen_helper_crc32_32(tdest, tsrca, tsrcb);
|
gen_helper_crc32_32(tdest, tsrca, tsrcb);
|
||||||
mnemonic = "crc32_32";
|
mnemonic = "crc32_32";
|
||||||
|
Loading…
Reference in New Issue
Block a user