target/arm: Use uint32_t instead of bitmap for sve vq's

The bitmap need only hold 15 bits; bitmap is over-complicated.
We can simplify operations quite a bit with plain logical ops.

The introduction of SVE_VQ_POW2_MAP eliminates the need for
looping in order to search for powers of two.  Simply perform
the logical ops and use count leading or trailing zeros as
required to find the result.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20220607203306.657998-12-richard.henderson@linaro.org
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2022-06-08 19:38:57 +01:00 committed by Peter Maydell
parent 9b5f422559
commit 886902ece7
6 changed files with 75 additions and 105 deletions

View File

@ -1041,9 +1041,9 @@ struct ArchCPU {
* Bits set in sve_vq_supported represent valid vector lengths for * Bits set in sve_vq_supported represent valid vector lengths for
* the CPU type. * the CPU type.
*/ */
DECLARE_BITMAP(sve_vq_map, ARM_MAX_VQ); uint32_t sve_vq_map;
DECLARE_BITMAP(sve_vq_init, ARM_MAX_VQ); uint32_t sve_vq_init;
DECLARE_BITMAP(sve_vq_supported, ARM_MAX_VQ); uint32_t sve_vq_supported;
/* Generic timer counter frequency, in Hz */ /* Generic timer counter frequency, in Hz */
uint64_t gt_cntfrq_hz; uint64_t gt_cntfrq_hz;

View File

@ -355,8 +355,11 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
* any of the above. Finally, if SVE is not disabled, then at least one * any of the above. Finally, if SVE is not disabled, then at least one
* vector length must be enabled. * vector length must be enabled.
*/ */
DECLARE_BITMAP(tmp, ARM_MAX_VQ); uint32_t vq_map = cpu->sve_vq_map;
uint32_t vq, max_vq = 0; uint32_t vq_init = cpu->sve_vq_init;
uint32_t vq_supported;
uint32_t vq_mask = 0;
uint32_t tmp, vq, max_vq = 0;
/* /*
* CPU models specify a set of supported vector lengths which are * CPU models specify a set of supported vector lengths which are
@ -364,10 +367,16 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
* in the supported bitmap results in an error. When KVM is enabled we * in the supported bitmap results in an error. When KVM is enabled we
* fetch the supported bitmap from the host. * fetch the supported bitmap from the host.
*/ */
if (kvm_enabled() && kvm_arm_sve_supported()) { if (kvm_enabled()) {
kvm_arm_sve_get_vls(CPU(cpu), cpu->sve_vq_supported); if (kvm_arm_sve_supported()) {
} else if (kvm_enabled()) { cpu->sve_vq_supported = kvm_arm_sve_get_vls(CPU(cpu));
assert(!cpu_isar_feature(aa64_sve, cpu)); vq_supported = cpu->sve_vq_supported;
} else {
assert(!cpu_isar_feature(aa64_sve, cpu));
vq_supported = 0;
}
} else {
vq_supported = cpu->sve_vq_supported;
} }
/* /*
@ -375,8 +384,9 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
* From the properties, sve_vq_map<N> implies sve_vq_init<N>. * From the properties, sve_vq_map<N> implies sve_vq_init<N>.
* Check first for any sve<N> enabled. * Check first for any sve<N> enabled.
*/ */
if (!bitmap_empty(cpu->sve_vq_map, ARM_MAX_VQ)) { if (vq_map != 0) {
max_vq = find_last_bit(cpu->sve_vq_map, ARM_MAX_VQ) + 1; max_vq = 32 - clz32(vq_map);
vq_mask = MAKE_64BIT_MASK(0, max_vq);
if (cpu->sve_max_vq && max_vq > cpu->sve_max_vq) { if (cpu->sve_max_vq && max_vq > cpu->sve_max_vq) {
error_setg(errp, "cannot enable sve%d", max_vq * 128); error_setg(errp, "cannot enable sve%d", max_vq * 128);
@ -392,15 +402,10 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
* For KVM we have to automatically enable all supported unitialized * For KVM we have to automatically enable all supported unitialized
* lengths, even when the smaller lengths are not all powers-of-two. * lengths, even when the smaller lengths are not all powers-of-two.
*/ */
bitmap_andnot(tmp, cpu->sve_vq_supported, cpu->sve_vq_init, max_vq); vq_map |= vq_supported & ~vq_init & vq_mask;
bitmap_or(cpu->sve_vq_map, cpu->sve_vq_map, tmp, max_vq);
} else { } else {
/* Propagate enabled bits down through required powers-of-two. */ /* Propagate enabled bits down through required powers-of-two. */
for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) { vq_map |= SVE_VQ_POW2_MAP & ~vq_init & vq_mask;
if (!test_bit(vq - 1, cpu->sve_vq_init)) {
set_bit(vq - 1, cpu->sve_vq_map);
}
}
} }
} else if (cpu->sve_max_vq == 0) { } else if (cpu->sve_max_vq == 0) {
/* /*
@ -413,25 +418,18 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
if (kvm_enabled()) { if (kvm_enabled()) {
/* Disabling a supported length disables all larger lengths. */ /* Disabling a supported length disables all larger lengths. */
for (vq = 1; vq <= ARM_MAX_VQ; ++vq) { tmp = vq_init & vq_supported;
if (test_bit(vq - 1, cpu->sve_vq_init) &&
test_bit(vq - 1, cpu->sve_vq_supported)) {
break;
}
}
} else { } else {
/* Disabling a power-of-two disables all larger lengths. */ /* Disabling a power-of-two disables all larger lengths. */
for (vq = 1; vq <= ARM_MAX_VQ; vq <<= 1) { tmp = vq_init & SVE_VQ_POW2_MAP;
if (test_bit(vq - 1, cpu->sve_vq_init)) {
break;
}
}
} }
vq = ctz32(tmp) + 1;
max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ; max_vq = vq <= ARM_MAX_VQ ? vq - 1 : ARM_MAX_VQ;
bitmap_andnot(cpu->sve_vq_map, cpu->sve_vq_supported, vq_mask = MAKE_64BIT_MASK(0, max_vq);
cpu->sve_vq_init, max_vq); vq_map = vq_supported & ~vq_init & vq_mask;
if (max_vq == 0 || bitmap_empty(cpu->sve_vq_map, max_vq)) {
if (max_vq == 0 || vq_map == 0) {
error_setg(errp, "cannot disable sve%d", vq * 128); error_setg(errp, "cannot disable sve%d", vq * 128);
error_append_hint(errp, "Disabling sve%d results in all " error_append_hint(errp, "Disabling sve%d results in all "
"vector lengths being disabled.\n", "vector lengths being disabled.\n",
@ -441,7 +439,8 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
return; return;
} }
max_vq = find_last_bit(cpu->sve_vq_map, max_vq) + 1; max_vq = 32 - clz32(vq_map);
vq_mask = MAKE_64BIT_MASK(0, max_vq);
} }
/* /*
@ -451,9 +450,9 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
*/ */
if (cpu->sve_max_vq != 0) { if (cpu->sve_max_vq != 0) {
max_vq = cpu->sve_max_vq; max_vq = cpu->sve_max_vq;
vq_mask = MAKE_64BIT_MASK(0, max_vq);
if (!test_bit(max_vq - 1, cpu->sve_vq_map) && if (vq_init & ~vq_map & (1 << (max_vq - 1))) {
test_bit(max_vq - 1, cpu->sve_vq_init)) {
error_setg(errp, "cannot disable sve%d", max_vq * 128); error_setg(errp, "cannot disable sve%d", max_vq * 128);
error_append_hint(errp, "The maximum vector length must be " error_append_hint(errp, "The maximum vector length must be "
"enabled, sve-max-vq=%d (%d bits)\n", "enabled, sve-max-vq=%d (%d bits)\n",
@ -462,8 +461,7 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
} }
/* Set all bits not explicitly set within sve-max-vq. */ /* Set all bits not explicitly set within sve-max-vq. */
bitmap_complement(tmp, cpu->sve_vq_init, max_vq); vq_map |= ~vq_init & vq_mask;
bitmap_or(cpu->sve_vq_map, cpu->sve_vq_map, tmp, max_vq);
} }
/* /*
@ -472,13 +470,14 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
* are clear, just in case anybody looks. * are clear, just in case anybody looks.
*/ */
assert(max_vq != 0); assert(max_vq != 0);
bitmap_clear(cpu->sve_vq_map, max_vq, ARM_MAX_VQ - max_vq); assert(vq_mask != 0);
vq_map &= vq_mask;
/* Ensure the set of lengths matches what is supported. */ /* Ensure the set of lengths matches what is supported. */
bitmap_xor(tmp, cpu->sve_vq_map, cpu->sve_vq_supported, max_vq); tmp = vq_map ^ (vq_supported & vq_mask);
if (!bitmap_empty(tmp, max_vq)) { if (tmp) {
vq = find_last_bit(tmp, max_vq) + 1; vq = 32 - clz32(tmp);
if (test_bit(vq - 1, cpu->sve_vq_map)) { if (vq_map & (1 << (vq - 1))) {
if (cpu->sve_max_vq) { if (cpu->sve_max_vq) {
error_setg(errp, "cannot set sve-max-vq=%d", cpu->sve_max_vq); error_setg(errp, "cannot set sve-max-vq=%d", cpu->sve_max_vq);
error_append_hint(errp, "This CPU does not support " error_append_hint(errp, "This CPU does not support "
@ -502,15 +501,15 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
return; return;
} else { } else {
/* Ensure all required powers-of-two are enabled. */ /* Ensure all required powers-of-two are enabled. */
for (vq = pow2floor(max_vq); vq >= 1; vq >>= 1) { tmp = SVE_VQ_POW2_MAP & vq_mask & ~vq_map;
if (!test_bit(vq - 1, cpu->sve_vq_map)) { if (tmp) {
error_setg(errp, "cannot disable sve%d", vq * 128); vq = 32 - clz32(tmp);
error_append_hint(errp, "sve%d is required as it " error_setg(errp, "cannot disable sve%d", vq * 128);
"is a power-of-two length smaller " error_append_hint(errp, "sve%d is required as it "
"than the maximum, sve%d\n", "is a power-of-two length smaller "
vq * 128, max_vq * 128); "than the maximum, sve%d\n",
return; vq * 128, max_vq * 128);
} return;
} }
} }
} }
@ -530,6 +529,7 @@ void arm_cpu_sve_finalize(ARMCPU *cpu, Error **errp)
/* From now on sve_max_vq is the actual maximum supported length. */ /* From now on sve_max_vq is the actual maximum supported length. */
cpu->sve_max_vq = max_vq; cpu->sve_max_vq = max_vq;
cpu->sve_vq_map = vq_map;
} }
static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name, static void cpu_max_get_sve_max_vq(Object *obj, Visitor *v, const char *name,
@ -590,7 +590,7 @@ static void cpu_arm_get_sve_vq(Object *obj, Visitor *v, const char *name,
if (!cpu_isar_feature(aa64_sve, cpu)) { if (!cpu_isar_feature(aa64_sve, cpu)) {
value = false; value = false;
} else { } else {
value = test_bit(vq - 1, cpu->sve_vq_map); value = extract32(cpu->sve_vq_map, vq - 1, 1);
} }
visit_type_bool(v, name, &value, errp); visit_type_bool(v, name, &value, errp);
} }
@ -612,12 +612,8 @@ static void cpu_arm_set_sve_vq(Object *obj, Visitor *v, const char *name,
return; return;
} }
if (value) { cpu->sve_vq_map = deposit32(cpu->sve_vq_map, vq - 1, 1, value);
set_bit(vq - 1, cpu->sve_vq_map); cpu->sve_vq_init |= 1 << (vq - 1);
} else {
clear_bit(vq - 1, cpu->sve_vq_map);
}
set_bit(vq - 1, cpu->sve_vq_init);
} }
static bool cpu_arm_get_sve(Object *obj, Error **errp) static bool cpu_arm_get_sve(Object *obj, Error **errp)
@ -979,7 +975,7 @@ static void aarch64_max_initfn(Object *obj)
cpu->dcz_blocksize = 7; /* 512 bytes */ cpu->dcz_blocksize = 7; /* 512 bytes */
#endif #endif
bitmap_fill(cpu->sve_vq_supported, ARM_MAX_VQ); cpu->sve_vq_supported = MAKE_64BIT_MASK(0, ARM_MAX_VQ);
aarch64_add_pauth_properties(obj); aarch64_add_pauth_properties(obj);
aarch64_add_sve_properties(obj); aarch64_add_sve_properties(obj);
@ -1026,12 +1022,11 @@ static void aarch64_a64fx_initfn(Object *obj)
cpu->gic_vprebits = 5; cpu->gic_vprebits = 5;
cpu->gic_pribits = 5; cpu->gic_pribits = 5;
/* Suppport of A64FX's vector length are 128,256 and 512bit only */ /* The A64FX supports only 128, 256 and 512 bit vector lengths */
aarch64_add_sve_properties(obj); aarch64_add_sve_properties(obj);
bitmap_zero(cpu->sve_vq_supported, ARM_MAX_VQ); cpu->sve_vq_supported = (1 << 0) /* 128bit */
set_bit(0, cpu->sve_vq_supported); /* 128bit */ | (1 << 1) /* 256bit */
set_bit(1, cpu->sve_vq_supported); /* 256bit */ | (1 << 3); /* 512bit */
set_bit(3, cpu->sve_vq_supported); /* 512bit */
cpu->isar.reset_pmcr_el0 = 0x46014040; cpu->isar.reset_pmcr_el0 = 0x46014040;

View File

@ -6219,7 +6219,6 @@ uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
{ {
ARMCPU *cpu = env_archcpu(env); ARMCPU *cpu = env_archcpu(env);
uint32_t len = cpu->sve_max_vq - 1; uint32_t len = cpu->sve_max_vq - 1;
uint32_t end_len;
if (el <= 1 && !el_is_in_host(env, el)) { if (el <= 1 && !el_is_in_host(env, el)) {
len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[1]); len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[1]);
@ -6231,12 +6230,8 @@ uint32_t sve_zcr_len_for_el(CPUARMState *env, int el)
len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[3]); len = MIN(len, 0xf & (uint32_t)env->vfp.zcr_el[3]);
} }
end_len = len; len = 31 - clz32(cpu->sve_vq_map & MAKE_64BIT_MASK(0, len + 1));
if (!test_bit(len, cpu->sve_vq_map)) { return len;
end_len = find_last_bit(cpu->sve_vq_map, len);
assert(end_len < len);
}
return end_len;
} }
static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri, static void zcr_write(CPUARMState *env, const ARMCPRegInfo *ri,

View File

@ -1340,4 +1340,9 @@ bool el_is_in_host(CPUARMState *env, int el);
void aa32_max_features(ARMCPU *cpu); void aa32_max_features(ARMCPU *cpu);
/* Powers of 2 for sve_vq_map et al. */
#define SVE_VQ_POW2_MAP \
((1 << (1 - 1)) | (1 << (2 - 1)) | \
(1 << (4 - 1)) | (1 << (8 - 1)) | (1 << (16 - 1)))
#endif #endif

View File

@ -760,15 +760,13 @@ bool kvm_arm_steal_time_supported(void)
QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1);
void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) uint32_t kvm_arm_sve_get_vls(CPUState *cs)
{ {
/* Only call this function if kvm_arm_sve_supported() returns true. */ /* Only call this function if kvm_arm_sve_supported() returns true. */
static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS]; static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS];
static bool probed; static bool probed;
uint32_t vq = 0; uint32_t vq = 0;
int i, j; int i;
bitmap_zero(map, ARM_MAX_VQ);
/* /*
* KVM ensures all host CPUs support the same set of vector lengths. * KVM ensures all host CPUs support the same set of vector lengths.
@ -809,46 +807,24 @@ void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map)
if (vq > ARM_MAX_VQ) { if (vq > ARM_MAX_VQ) {
warn_report("KVM supports vector lengths larger than " warn_report("KVM supports vector lengths larger than "
"QEMU can enable"); "QEMU can enable");
vls[0] &= MAKE_64BIT_MASK(0, ARM_MAX_VQ);
} }
} }
for (i = 0; i < KVM_ARM64_SVE_VLS_WORDS; ++i) { return vls[0];
if (!vls[i]) {
continue;
}
for (j = 1; j <= 64; ++j) {
vq = j + i * 64;
if (vq > ARM_MAX_VQ) {
return;
}
if (vls[i] & (1UL << (j - 1))) {
set_bit(vq - 1, map);
}
}
}
} }
static int kvm_arm_sve_set_vls(CPUState *cs) static int kvm_arm_sve_set_vls(CPUState *cs)
{ {
uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = {0}; ARMCPU *cpu = ARM_CPU(cs);
uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = { cpu->sve_vq_map };
struct kvm_one_reg reg = { struct kvm_one_reg reg = {
.id = KVM_REG_ARM64_SVE_VLS, .id = KVM_REG_ARM64_SVE_VLS,
.addr = (uint64_t)&vls[0], .addr = (uint64_t)&vls[0],
}; };
ARMCPU *cpu = ARM_CPU(cs);
uint32_t vq;
int i, j;
assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX); assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX);
for (vq = 1; vq <= cpu->sve_max_vq; ++vq) {
if (test_bit(vq - 1, cpu->sve_vq_map)) {
i = (vq - 1) / 64;
j = (vq - 1) % 64;
vls[i] |= 1UL << j;
}
}
return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg); return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
} }

View File

@ -239,13 +239,12 @@ bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf);
/** /**
* kvm_arm_sve_get_vls: * kvm_arm_sve_get_vls:
* @cs: CPUState * @cs: CPUState
* @map: bitmap to fill in
* *
* Get all the SVE vector lengths supported by the KVM host, setting * Get all the SVE vector lengths supported by the KVM host, setting
* the bits corresponding to their length in quadwords minus one * the bits corresponding to their length in quadwords minus one
* (vq - 1) in @map up to ARM_MAX_VQ. * (vq - 1) up to ARM_MAX_VQ. Return the resulting map.
*/ */
void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map); uint32_t kvm_arm_sve_get_vls(CPUState *cs);
/** /**
* kvm_arm_set_cpu_features_from_host: * kvm_arm_set_cpu_features_from_host:
@ -439,7 +438,7 @@ static inline void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp)
g_assert_not_reached(); g_assert_not_reached();
} }
static inline void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) static inline uint32_t kvm_arm_sve_get_vls(CPUState *cs)
{ {
g_assert_not_reached(); g_assert_not_reached();
} }