qemu/tests/tcg/hexagon/hvx_misc.c
Taylor Simpson 1e814a0dc4 Hexagon (target/hexagon) make VyV operands use a unique temp
VyV operand is only used in the vshuff and vdeal instructions.  These
instructions write to both VyV and VxV operands.  In the case where
both operands are the same register, we need a separate location for
VyV.  We use the existing vtmp field in CPUHexagonState.

Test case added in tests/tcg/hexagon/hvx_misc.c

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220718230320.24444-2-tsimpson@quicinc.com>
2022-07-31 16:22:09 -07:00

584 lines
17 KiB
C

/*
* Copyright(c) 2021-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <limits.h>
int err;
static void __check(int line, int i, int j, uint64_t result, uint64_t expect)
{
if (result != expect) {
printf("ERROR at line %d: [%d][%d] 0x%016llx != 0x%016llx\n",
line, i, j, result, expect);
err++;
}
}
#define check(RES, EXP) __check(__LINE__, RES, EXP)
#define MAX_VEC_SIZE_BYTES 128
typedef union {
uint64_t ud[MAX_VEC_SIZE_BYTES / 8];
int64_t d[MAX_VEC_SIZE_BYTES / 8];
uint32_t uw[MAX_VEC_SIZE_BYTES / 4];
int32_t w[MAX_VEC_SIZE_BYTES / 4];
uint16_t uh[MAX_VEC_SIZE_BYTES / 2];
int16_t h[MAX_VEC_SIZE_BYTES / 2];
uint8_t ub[MAX_VEC_SIZE_BYTES / 1];
int8_t b[MAX_VEC_SIZE_BYTES / 1];
} MMVector;
#define BUFSIZE 16
#define OUTSIZE 16
#define MASKMOD 3
MMVector buffer0[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
MMVector buffer1[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
MMVector mask[BUFSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
MMVector output[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
MMVector expect[OUTSIZE] __attribute__((aligned(MAX_VEC_SIZE_BYTES)));
#define CHECK_OUTPUT_FUNC(FIELD, FIELDSZ) \
static void check_output_##FIELD(int line, size_t num_vectors) \
{ \
for (int i = 0; i < num_vectors; i++) { \
for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \
__check(line, i, j, output[i].FIELD[j], expect[i].FIELD[j]); \
} \
} \
}
CHECK_OUTPUT_FUNC(d, 8)
CHECK_OUTPUT_FUNC(w, 4)
CHECK_OUTPUT_FUNC(h, 2)
CHECK_OUTPUT_FUNC(b, 1)
static void init_buffers(void)
{
int counter0 = 0;
int counter1 = 17;
for (int i = 0; i < BUFSIZE; i++) {
for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) {
buffer0[i].b[j] = counter0++;
buffer1[i].b[j] = counter1++;
}
for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
mask[i].w[j] = (i + j % MASKMOD == 0) ? 0 : 1;
}
}
}
static void test_load_tmp(void)
{
void *p0 = buffer0;
void *p1 = buffer1;
void *pout = output;
for (int i = 0; i < BUFSIZE; i++) {
/*
* Load into v12 as .tmp, then use it in the next packet
* Should get the new value within the same packet and
* the old value in the next packet
*/
asm("v3 = vmem(%0 + #0)\n\t"
"r1 = #1\n\t"
"v12 = vsplat(r1)\n\t"
"{\n\t"
" v12.tmp = vmem(%1 + #0)\n\t"
" v4.w = vadd(v12.w, v3.w)\n\t"
"}\n\t"
"v4.w = vadd(v4.w, v12.w)\n\t"
"vmem(%2 + #0) = v4\n\t"
: : "r"(p0), "r"(p1), "r"(pout)
: "r1", "v12", "v3", "v4", "v6", "memory");
p0 += sizeof(MMVector);
p1 += sizeof(MMVector);
pout += sizeof(MMVector);
for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
expect[i].w[j] = buffer0[i].w[j] + buffer1[i].w[j] + 1;
}
}
check_output_w(__LINE__, BUFSIZE);
}
static void test_load_cur(void)
{
void *p0 = buffer0;
void *pout = output;
for (int i = 0; i < BUFSIZE; i++) {
asm("{\n\t"
" v2.cur = vmem(%0 + #0)\n\t"
" vmem(%1 + #0) = v2\n\t"
"}\n\t"
: : "r"(p0), "r"(pout) : "v2", "memory");
p0 += sizeof(MMVector);
pout += sizeof(MMVector);
for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
expect[i].uw[j] = buffer0[i].uw[j];
}
}
check_output_w(__LINE__, BUFSIZE);
}
static void test_load_aligned(void)
{
/* Aligned loads ignore the low bits of the address */
void *p0 = buffer0;
void *pout = output;
const size_t offset = 13;
p0 += offset; /* Create an unaligned address */
asm("v2 = vmem(%0 + #0)\n\t"
"vmem(%1 + #0) = v2\n\t"
: : "r"(p0), "r"(pout) : "v2", "memory");
expect[0] = buffer0[0];
check_output_w(__LINE__, 1);
}
static void test_load_unaligned(void)
{
void *p0 = buffer0;
void *pout = output;
const size_t offset = 12;
p0 += offset; /* Create an unaligned address */
asm("v2 = vmemu(%0 + #0)\n\t"
"vmem(%1 + #0) = v2\n\t"
: : "r"(p0), "r"(pout) : "v2", "memory");
memcpy(expect, &buffer0[0].ub[offset], sizeof(MMVector));
check_output_w(__LINE__, 1);
}
static void test_store_aligned(void)
{
/* Aligned stores ignore the low bits of the address */
void *p0 = buffer0;
void *pout = output;
const size_t offset = 13;
pout += offset; /* Create an unaligned address */
asm("v2 = vmem(%0 + #0)\n\t"
"vmem(%1 + #0) = v2\n\t"
: : "r"(p0), "r"(pout) : "v2", "memory");
expect[0] = buffer0[0];
check_output_w(__LINE__, 1);
}
static void test_store_unaligned(void)
{
void *p0 = buffer0;
void *pout = output;
const size_t offset = 12;
pout += offset; /* Create an unaligned address */
asm("v2 = vmem(%0 + #0)\n\t"
"vmemu(%1 + #0) = v2\n\t"
: : "r"(p0), "r"(pout) : "v2", "memory");
memcpy(expect, buffer0, 2 * sizeof(MMVector));
memcpy(&expect[0].ub[offset], buffer0, sizeof(MMVector));
check_output_w(__LINE__, 2);
}
static void test_masked_store(bool invert)
{
void *p0 = buffer0;
void *pmask = mask;
void *pout = output;
memset(expect, 0xff, sizeof(expect));
memset(output, 0xff, sizeof(expect));
for (int i = 0; i < BUFSIZE; i++) {
if (invert) {
asm("r4 = #0\n\t"
"v4 = vsplat(r4)\n\t"
"v5 = vmem(%0 + #0)\n\t"
"q0 = vcmp.eq(v4.w, v5.w)\n\t"
"v5 = vmem(%1)\n\t"
"if (!q0) vmem(%2) = v5\n\t" /* Inverted test */
: : "r"(pmask), "r"(p0), "r"(pout)
: "r4", "v4", "v5", "q0", "memory");
} else {
asm("r4 = #0\n\t"
"v4 = vsplat(r4)\n\t"
"v5 = vmem(%0 + #0)\n\t"
"q0 = vcmp.eq(v4.w, v5.w)\n\t"
"v5 = vmem(%1)\n\t"
"if (q0) vmem(%2) = v5\n\t" /* Non-inverted test */
: : "r"(pmask), "r"(p0), "r"(pout)
: "r4", "v4", "v5", "q0", "memory");
}
p0 += sizeof(MMVector);
pmask += sizeof(MMVector);
pout += sizeof(MMVector);
for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
if (invert) {
if (i + j % MASKMOD != 0) {
expect[i].w[j] = buffer0[i].w[j];
}
} else {
if (i + j % MASKMOD == 0) {
expect[i].w[j] = buffer0[i].w[j];
}
}
}
}
check_output_w(__LINE__, BUFSIZE);
}
static void test_new_value_store(void)
{
void *p0 = buffer0;
void *pout = output;
asm("{\n\t"
" v2 = vmem(%0 + #0)\n\t"
" vmem(%1 + #0) = v2.new\n\t"
"}\n\t"
: : "r"(p0), "r"(pout) : "v2", "memory");
expect[0] = buffer0[0];
check_output_w(__LINE__, 1);
}
static void test_max_temps()
{
void *p0 = buffer0;
void *pout = output;
asm("v0 = vmem(%0 + #0)\n\t"
"v1 = vmem(%0 + #1)\n\t"
"v2 = vmem(%0 + #2)\n\t"
"v3 = vmem(%0 + #3)\n\t"
"v4 = vmem(%0 + #4)\n\t"
"{\n\t"
" v1:0.w = vadd(v3:2.w, v1:0.w)\n\t"
" v2.b = vshuffe(v3.b, v2.b)\n\t"
" v3.w = vadd(v1.w, v4.w)\n\t"
" v4.tmp = vmem(%0 + #5)\n\t"
"}\n\t"
"vmem(%1 + #0) = v0\n\t"
"vmem(%1 + #1) = v1\n\t"
"vmem(%1 + #2) = v2\n\t"
"vmem(%1 + #3) = v3\n\t"
"vmem(%1 + #4) = v4\n\t"
: : "r"(p0), "r"(pout) : "memory");
/* The first two vectors come from the vadd-pair instruction */
for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
expect[0].w[i] = buffer0[0].w[i] + buffer0[2].w[i];
expect[1].w[i] = buffer0[1].w[i] + buffer0[3].w[i];
}
/* The third vector comes from the vshuffe instruction */
for (int i = 0; i < MAX_VEC_SIZE_BYTES / 2; i++) {
expect[2].uh[i] = (buffer0[2].uh[i] & 0xff) |
(buffer0[3].uh[i] & 0xff) << 8;
}
/* The fourth vector comes from the vadd-single instruction */
for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
expect[3].w[i] = buffer0[1].w[i] + buffer0[5].w[i];
}
/*
* The fifth vector comes from the load to v4
* make sure the .tmp is dropped
*/
expect[4] = buffer0[4];
check_output_b(__LINE__, 5);
}
#define VEC_OP1(ASM, EL, IN, OUT) \
asm("v2 = vmem(%0 + #0)\n\t" \
"v2" #EL " = " #ASM "(v2" #EL ")\n\t" \
"vmem(%1 + #0) = v2\n\t" \
: : "r"(IN), "r"(OUT) : "v2", "memory")
#define VEC_OP2(ASM, EL, IN0, IN1, OUT) \
asm("v2 = vmem(%0 + #0)\n\t" \
"v3 = vmem(%1 + #0)\n\t" \
"v2" #EL " = " #ASM "(v2" #EL ", v3" #EL ")\n\t" \
"vmem(%2 + #0) = v2\n\t" \
: : "r"(IN0), "r"(IN1), "r"(OUT) : "v2", "v3", "memory")
#define TEST_VEC_OP1(NAME, ASM, EL, FIELD, FIELDSZ, OP) \
static void test_##NAME(void) \
{ \
void *pin = buffer0; \
void *pout = output; \
for (int i = 0; i < BUFSIZE; i++) { \
VEC_OP1(ASM, EL, pin, pout); \
pin += sizeof(MMVector); \
pout += sizeof(MMVector); \
} \
for (int i = 0; i < BUFSIZE; i++) { \
for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \
expect[i].FIELD[j] = OP buffer0[i].FIELD[j]; \
} \
} \
check_output_##FIELD(__LINE__, BUFSIZE); \
}
#define TEST_VEC_OP2(NAME, ASM, EL, FIELD, FIELDSZ, OP) \
static void test_##NAME(void) \
{ \
void *p0 = buffer0; \
void *p1 = buffer1; \
void *pout = output; \
for (int i = 0; i < BUFSIZE; i++) { \
VEC_OP2(ASM, EL, p0, p1, pout); \
p0 += sizeof(MMVector); \
p1 += sizeof(MMVector); \
pout += sizeof(MMVector); \
} \
for (int i = 0; i < BUFSIZE; i++) { \
for (int j = 0; j < MAX_VEC_SIZE_BYTES / FIELDSZ; j++) { \
expect[i].FIELD[j] = buffer0[i].FIELD[j] OP buffer1[i].FIELD[j]; \
} \
} \
check_output_##FIELD(__LINE__, BUFSIZE); \
}
#define THRESHOLD 31
#define PRED_OP2(ASM, IN0, IN1, OUT, INV) \
asm("r4 = #%3\n\t" \
"v1.b = vsplat(r4)\n\t" \
"v2 = vmem(%0 + #0)\n\t" \
"q0 = vcmp.gt(v2.b, v1.b)\n\t" \
"v3 = vmem(%1 + #0)\n\t" \
"q1 = vcmp.gt(v3.b, v1.b)\n\t" \
"q2 = " #ASM "(q0, " INV "q1)\n\t" \
"r4 = #0xff\n\t" \
"v1.b = vsplat(r4)\n\t" \
"if (q2) vmem(%2 + #0) = v1\n\t" \
: : "r"(IN0), "r"(IN1), "r"(OUT), "i"(THRESHOLD) \
: "r4", "v1", "v2", "v3", "q0", "q1", "q2", "memory")
#define TEST_PRED_OP2(NAME, ASM, OP, INV) \
static void test_##NAME(bool invert) \
{ \
void *p0 = buffer0; \
void *p1 = buffer1; \
void *pout = output; \
memset(output, 0, sizeof(expect)); \
for (int i = 0; i < BUFSIZE; i++) { \
PRED_OP2(ASM, p0, p1, pout, INV); \
p0 += sizeof(MMVector); \
p1 += sizeof(MMVector); \
pout += sizeof(MMVector); \
} \
for (int i = 0; i < BUFSIZE; i++) { \
for (int j = 0; j < MAX_VEC_SIZE_BYTES; j++) { \
bool p0 = (buffer0[i].b[j] > THRESHOLD); \
bool p1 = (buffer1[i].b[j] > THRESHOLD); \
if (invert) { \
expect[i].b[j] = (p0 OP !p1) ? 0xff : 0x00; \
} else { \
expect[i].b[j] = (p0 OP p1) ? 0xff : 0x00; \
} \
} \
} \
check_output_b(__LINE__, BUFSIZE); \
}
TEST_VEC_OP2(vadd_w, vadd, .w, w, 4, +)
TEST_VEC_OP2(vadd_h, vadd, .h, h, 2, +)
TEST_VEC_OP2(vadd_b, vadd, .b, b, 1, +)
TEST_VEC_OP2(vsub_w, vsub, .w, w, 4, -)
TEST_VEC_OP2(vsub_h, vsub, .h, h, 2, -)
TEST_VEC_OP2(vsub_b, vsub, .b, b, 1, -)
TEST_VEC_OP2(vxor, vxor, , d, 8, ^)
TEST_VEC_OP2(vand, vand, , d, 8, &)
TEST_VEC_OP2(vor, vor, , d, 8, |)
TEST_VEC_OP1(vnot, vnot, , d, 8, ~)
TEST_PRED_OP2(pred_or, or, |, "")
TEST_PRED_OP2(pred_or_n, or, |, "!")
TEST_PRED_OP2(pred_and, and, &, "")
TEST_PRED_OP2(pred_and_n, and, &, "!")
TEST_PRED_OP2(pred_xor, xor, ^, "")
static void test_vadduwsat(void)
{
/*
* Test for saturation by adding two numbers that add to more than UINT_MAX
* and make sure the result saturates to UINT_MAX
*/
const uint32_t x = 0xffff0000;
const uint32_t y = 0x000fffff;
memset(expect, 0x12, sizeof(MMVector));
memset(output, 0x34, sizeof(MMVector));
asm volatile ("v10 = vsplat(%0)\n\t"
"v11 = vsplat(%1)\n\t"
"v21.uw = vadd(v11.uw, v10.uw):sat\n\t"
"vmem(%2+#0) = v21\n\t"
: /* no outputs */
: "r"(x), "r"(y), "r"(output)
: "v10", "v11", "v21", "memory");
for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
expect[0].uw[j] = UINT_MAX;
}
check_output_w(__LINE__, 1);
}
static void test_vsubuwsat_dv(void)
{
/*
* Test for saturation by subtracting two numbers where the result is
* negative and make sure the result saturates to zero
*
* vsubuwsat_dv operates on an HVX register pair, so we'll have a
* pair of subtractions
* w - x < 0
* y - z < 0
*/
const uint32_t w = 0x000000b7;
const uint32_t x = 0xffffff4e;
const uint32_t y = 0x31fe88e7;
const uint32_t z = 0x7fffff79;
memset(expect, 0x12, sizeof(MMVector) * 2);
memset(output, 0x34, sizeof(MMVector) * 2);
asm volatile ("v16 = vsplat(%0)\n\t"
"v17 = vsplat(%1)\n\t"
"v26 = vsplat(%2)\n\t"
"v27 = vsplat(%3)\n\t"
"v25:24.uw = vsub(v17:16.uw, v27:26.uw):sat\n\t"
"vmem(%4+#0) = v24\n\t"
"vmem(%4+#1) = v25\n\t"
: /* no outputs */
: "r"(w), "r"(y), "r"(x), "r"(z), "r"(output)
: "v16", "v17", "v24", "v25", "v26", "v27", "memory");
for (int j = 0; j < MAX_VEC_SIZE_BYTES / 4; j++) {
expect[0].uw[j] = 0x00000000;
expect[1].uw[j] = 0x00000000;
}
check_output_w(__LINE__, 2);
}
static void test_vshuff(void)
{
/* Test that vshuff works when the two operands are the same register */
const uint32_t splat = 0x089be55c;
const uint32_t shuff = 0x454fa926;
MMVector v0, v1;
memset(expect, 0x12, sizeof(MMVector));
memset(output, 0x34, sizeof(MMVector));
asm volatile("v25 = vsplat(%0)\n\t"
"vshuff(v25, v25, %1)\n\t"
"vmem(%2 + #0) = v25\n\t"
: /* no outputs */
: "r"(splat), "r"(shuff), "r"(output)
: "v25", "memory");
/*
* The semantics of Hexagon are the operands are pass-by-value, so create
* two copies of the vsplat result.
*/
for (int i = 0; i < MAX_VEC_SIZE_BYTES / 4; i++) {
v0.uw[i] = splat;
v1.uw[i] = splat;
}
/* Do the vshuff operation */
for (int offset = 1; offset < MAX_VEC_SIZE_BYTES; offset <<= 1) {
if (shuff & offset) {
for (int k = 0; k < MAX_VEC_SIZE_BYTES; k++) {
if (!(k & offset)) {
uint8_t tmp = v0.ub[k];
v0.ub[k] = v1.ub[k + offset];
v1.ub[k + offset] = tmp;
}
}
}
}
/* Put the result in the expect buffer for verification */
expect[0] = v1;
check_output_b(__LINE__, 1);
}
int main()
{
init_buffers();
test_load_tmp();
test_load_cur();
test_load_aligned();
test_load_unaligned();
test_store_aligned();
test_store_unaligned();
test_masked_store(false);
test_masked_store(true);
test_new_value_store();
test_max_temps();
test_vadd_w();
test_vadd_h();
test_vadd_b();
test_vsub_w();
test_vsub_h();
test_vsub_b();
test_vxor();
test_vand();
test_vor();
test_vnot();
test_pred_or(false);
test_pred_or_n(true);
test_pred_and(false);
test_pred_and_n(true);
test_pred_xor(false);
test_vadduwsat();
test_vsubuwsat_dv();
test_vshuff();
puts(err ? "FAIL" : "PASS");
return err ? 1 : 0;
}