Hexagon (target/hexagon) Fix assignment to tmp registers
The order in which instructions are generated by gen_insn() influences assignment to tmp registers. During generation, tmp instructions (e.g. generate_V6_vassign_tmp) use vreg_src_off() to determine what kind of register to use as source. If some instruction (e.g. generate_V6_vmpyowh_64_acc) uses a tmp register but is generated prior to the corresponding tmp instruction, the vregs_updated_tmp bit map isn't updated in time. Exmple: { v14.tmp = v16; v25 = v14 } This works properly because generate_V6_vassign_tmp is generated before generate_V6_vassign and the bit map is updated. { v15:14.tmp = vcombine(v21, v16); v25:24 += vmpyo(v18.w,v14.h) } This does not work properly because vmpyo is generated before vcombine and therefore the bit map does not yet know that there's a tmp register. The parentheses in the decoding function were in the wrong place. Moving them to the correct location makes shuffling of .tmp vector registers work as expected. Signed-off-by: Marco Liebel <quic_mliebel@quicinc.com> Reviewed-by: Taylor Simpson <tsimpson@quicinc.com> Tested-by: Taylor Simpson <tsimpson@quicinc.com> Signed-off-by: Taylor Simpson <tsimpson@quicinc.com> Reviewed-by: Brian Cain <bcain@quicinc.com> Message-Id: <20230522174708.464197-1-quic_mliebel@quicinc.com>
This commit is contained in:
parent
0d57cd61d9
commit
3fd49e2217
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
|
* Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify
|
* This program is free software; you can redistribute it and/or modify
|
||||||
* it under the terms of the GNU General Public License as published by
|
* it under the terms of the GNU General Public License as published by
|
||||||
@ -148,9 +148,9 @@ decode_shuffle_for_execution_vops(Packet *pkt)
|
|||||||
int i;
|
int i;
|
||||||
for (i = 0; i < pkt->num_insns; i++) {
|
for (i = 0; i < pkt->num_insns; i++) {
|
||||||
uint16_t opcode = pkt->insn[i].opcode;
|
uint16_t opcode = pkt->insn[i].opcode;
|
||||||
if (GET_ATTRIB(opcode, A_LOAD) &&
|
if ((GET_ATTRIB(opcode, A_LOAD) &&
|
||||||
(GET_ATTRIB(opcode, A_CVI_NEW) ||
|
GET_ATTRIB(opcode, A_CVI_NEW)) ||
|
||||||
GET_ATTRIB(opcode, A_CVI_TMP))) {
|
GET_ATTRIB(opcode, A_CVI_TMP)) {
|
||||||
/*
|
/*
|
||||||
* Find prior consuming vector instructions
|
* Find prior consuming vector instructions
|
||||||
* Move to end of packet
|
* Move to end of packet
|
||||||
|
@ -60,6 +60,36 @@ static void test_load_tmp(void)
|
|||||||
check_output_w(__LINE__, BUFSIZE);
|
check_output_w(__LINE__, BUFSIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void test_load_tmp2(void)
|
||||||
|
{
|
||||||
|
void *pout0 = &output[0];
|
||||||
|
void *pout1 = &output[1];
|
||||||
|
|
||||||
|
asm volatile(
|
||||||
|
"r0 = #0x03030303\n\t"
|
||||||
|
"v16 = vsplat(r0)\n\t"
|
||||||
|
"r0 = #0x04040404\n\t"
|
||||||
|
"v18 = vsplat(r0)\n\t"
|
||||||
|
"r0 = #0x05050505\n\t"
|
||||||
|
"v21 = vsplat(r0)\n\t"
|
||||||
|
"{\n\t"
|
||||||
|
" v25:24 += vmpyo(v18.w, v14.h)\n\t"
|
||||||
|
" v15:14.tmp = vcombine(v21, v16)\n\t"
|
||||||
|
"}\n\t"
|
||||||
|
"vmem(%0 + #0) = v24\n\t"
|
||||||
|
"vmem(%1 + #0) = v25\n\t"
|
||||||
|
: : "r"(pout0), "r"(pout1)
|
||||||
|
: "r0", "v16", "v18", "v21", "v24", "v25", "memory"
|
||||||
|
);
|
||||||
|
|
||||||
|
for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; ++i) {
|
||||||
|
expect[0].w[i] = 0x180c0000;
|
||||||
|
expect[1].w[i] = 0x000c1818;
|
||||||
|
}
|
||||||
|
|
||||||
|
check_output_w(__LINE__, 2);
|
||||||
|
}
|
||||||
|
|
||||||
static void test_load_cur(void)
|
static void test_load_cur(void)
|
||||||
{
|
{
|
||||||
void *p0 = buffer0;
|
void *p0 = buffer0;
|
||||||
@ -435,6 +465,7 @@ int main()
|
|||||||
init_buffers();
|
init_buffers();
|
||||||
|
|
||||||
test_load_tmp();
|
test_load_tmp();
|
||||||
|
test_load_tmp2();
|
||||||
test_load_cur();
|
test_load_cur();
|
||||||
test_load_aligned();
|
test_load_aligned();
|
||||||
test_load_unaligned();
|
test_load_unaligned();
|
||||||
|
Loading…
Reference in New Issue
Block a user