From 8a132968b2ce3f7078c8643dbc00e5c642eef0f6 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:52 +0000 Subject: [PATCH 01/31] softfloat: Allow 2-operand NaN propagation rule to be set at runtime IEEE 758 does not define a fixed rule for which NaN to pick as the result if both operands of a 2-operand operation are NaNs. As a result different architectures have ended up with different rules for propagating NaNs. QEMU currently hardcodes the NaN propagation logic into the binary because pickNaN() has an ifdef ladder for different targets. We want to make the propagation rule instead be selectable at runtime, because: * this will let us have multiple targets in one QEMU binary * the Arm FEAT_AFP architectural feature includes letting the guest select a NaN propagation rule at runtime * x86 specifies different propagation rules for x87 FPU ops and for SSE ops, and specifying the rule in the float_status would let us emulate this, instead of wrongly using the x87 rules everywhere In this commit we add an enum for the propagation rule, the field in float_status, and the corresponding getters and setters. We change pickNaN to honour this, but because all targets still leave this field at its default 0 value, the fallback logic will pick the rule type with the old ifdef ladder. It's valid not to set a propagation rule if default_nan_mode is enabled, because in that case there's no need to pick a NaN; all the callers of pickNaN() catch this case and skip calling it. So we can already assert that we don't get into the "no rule defined" codepath for our four targets which always set default_nan_mode: Hexagon, RiscV, SH4 and Tricore, and for the one target which does not have FP at all: avr. These targets will not need to be updated to call set_float_2nan_prop_rule(). Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-2-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 229 ++++++++++++++++++-------------- include/fpu/softfloat-helpers.h | 11 ++ include/fpu/softfloat-types.h | 42 ++++++ 3 files changed, 185 insertions(+), 97 deletions(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index 4e279b9bc4..fae6794a15 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -390,118 +390,153 @@ bool float32_is_signaling_nan(float32 a_, float_status *status) static int pickNaN(FloatClass a_cls, FloatClass b_cls, bool aIsLargerSignificand, float_status *status) { -#if defined(TARGET_ARM) || defined(TARGET_MIPS) || defined(TARGET_HPPA) || \ - defined(TARGET_LOONGARCH64) || defined(TARGET_S390X) - /* ARM mandated NaN propagation rules (see FPProcessNaNs()), take - * the first of: - * 1. A if it is signaling - * 2. B if it is signaling - * 3. A (quiet) - * 4. B (quiet) - * A signaling NaN is always quietened before returning it. - */ - /* According to MIPS specifications, if one of the two operands is - * a sNaN, a new qNaN has to be generated. This is done in - * floatXX_silence_nan(). For qNaN inputs the specifications - * says: "When possible, this QNaN result is one of the operand QNaN - * values." In practice it seems that most implementations choose - * the first operand if both operands are qNaN. In short this gives - * the following rules: - * 1. A if it is signaling - * 2. B if it is signaling - * 3. A (quiet) - * 4. B (quiet) - * A signaling NaN is always silenced before returning it. - */ - if (is_snan(a_cls)) { - return 0; - } else if (is_snan(b_cls)) { - return 1; - } else if (is_qnan(a_cls)) { - return 0; - } else { - return 1; - } -#elif defined(TARGET_PPC) || defined(TARGET_M68K) - /* PowerPC propagation rules: - * 1. A if it sNaN or qNaN - * 2. B if it sNaN or qNaN - * A signaling NaN is always silenced before returning it. - */ - /* M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL - * 3.4 FLOATING-POINT INSTRUCTION DETAILS - * If either operand, but not both operands, of an operation is a - * nonsignaling NaN, then that NaN is returned as the result. If both - * operands are nonsignaling NaNs, then the destination operand - * nonsignaling NaN is returned as the result. - * If either operand to an operation is a signaling NaN (SNaN), then the - * SNaN bit is set in the FPSR EXC byte. If the SNaN exception enable bit - * is set in the FPCR ENABLE byte, then the exception is taken and the - * destination is not modified. If the SNaN exception enable bit is not - * set, setting the SNaN bit in the operand to a one converts the SNaN to - * a nonsignaling NaN. The operation then continues as described in the - * preceding paragraph for nonsignaling NaNs. - */ - if (is_nan(a_cls)) { - return 0; - } else { - return 1; - } -#elif defined(TARGET_SPARC) - /* Prefer SNaN over QNaN, order B then A. */ - if (is_snan(b_cls)) { - return 1; - } else if (is_snan(a_cls)) { - return 0; - } else if (is_qnan(b_cls)) { - return 1; - } else { - return 0; - } -#elif defined(TARGET_XTENSA) + Float2NaNPropRule rule = status->float_2nan_prop_rule; + /* - * Xtensa has two NaN propagation modes. - * Which one is active is controlled by float_status::use_first_nan. + * We guarantee not to require the target to tell us how to + * pick a NaN if we're always returning the default NaN. */ - if (status->use_first_nan) { + assert(!status->default_nan_mode); + + if (rule == float_2nan_prop_none) { + /* target didn't set the rule: fall back to old ifdef choices */ +#if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ + || defined(TARGET_RISCV) || defined(TARGET_SH4) \ + || defined(TARGET_TRICORE) + g_assert_not_reached(); +#elif defined(TARGET_ARM) || defined(TARGET_MIPS) || defined(TARGET_HPPA) || \ + defined(TARGET_LOONGARCH64) || defined(TARGET_S390X) + /* + * ARM mandated NaN propagation rules (see FPProcessNaNs()), take + * the first of: + * 1. A if it is signaling + * 2. B if it is signaling + * 3. A (quiet) + * 4. B (quiet) + * A signaling NaN is always quietened before returning it. + */ + /* + * According to MIPS specifications, if one of the two operands is + * a sNaN, a new qNaN has to be generated. This is done in + * floatXX_silence_nan(). For qNaN inputs the specifications + * says: "When possible, this QNaN result is one of the operand QNaN + * values." In practice it seems that most implementations choose + * the first operand if both operands are qNaN. In short this gives + * the following rules: + * 1. A if it is signaling + * 2. B if it is signaling + * 3. A (quiet) + * 4. B (quiet) + * A signaling NaN is always silenced before returning it. + */ + rule = float_2nan_prop_s_ab; +#elif defined(TARGET_PPC) || defined(TARGET_M68K) + /* + * PowerPC propagation rules: + * 1. A if it sNaN or qNaN + * 2. B if it sNaN or qNaN + * A signaling NaN is always silenced before returning it. + */ + /* + * M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL + * 3.4 FLOATING-POINT INSTRUCTION DETAILS + * If either operand, but not both operands, of an operation is a + * nonsignaling NaN, then that NaN is returned as the result. If both + * operands are nonsignaling NaNs, then the destination operand + * nonsignaling NaN is returned as the result. + * If either operand to an operation is a signaling NaN (SNaN), then the + * SNaN bit is set in the FPSR EXC byte. If the SNaN exception enable bit + * is set in the FPCR ENABLE byte, then the exception is taken and the + * destination is not modified. If the SNaN exception enable bit is not + * set, setting the SNaN bit in the operand to a one converts the SNaN to + * a nonsignaling NaN. The operation then continues as described in the + * preceding paragraph for nonsignaling NaNs. + */ + rule = float_2nan_prop_ab; +#elif defined(TARGET_SPARC) + /* Prefer SNaN over QNaN, order B then A. */ + rule = float_2nan_prop_s_ba; +#elif defined(TARGET_XTENSA) + /* + * Xtensa has two NaN propagation modes. + * Which one is active is controlled by float_status::use_first_nan. + */ + if (status->use_first_nan) { + rule = float_2nan_prop_ab; + } else { + rule = float_2nan_prop_ba; + } +#else + rule = float_2nan_prop_x87; +#endif + } + + switch (rule) { + case float_2nan_prop_s_ab: + if (is_snan(a_cls)) { + return 0; + } else if (is_snan(b_cls)) { + return 1; + } else if (is_qnan(a_cls)) { + return 0; + } else { + return 1; + } + break; + case float_2nan_prop_s_ba: + if (is_snan(b_cls)) { + return 1; + } else if (is_snan(a_cls)) { + return 0; + } else if (is_qnan(b_cls)) { + return 1; + } else { + return 0; + } + break; + case float_2nan_prop_ab: if (is_nan(a_cls)) { return 0; } else { return 1; } - } else { + break; + case float_2nan_prop_ba: if (is_nan(b_cls)) { return 1; } else { return 0; } - } -#else - /* This implements x87 NaN propagation rules: - * SNaN + QNaN => return the QNaN - * two SNaNs => return the one with the larger significand, silenced - * two QNaNs => return the one with the larger significand - * SNaN and a non-NaN => return the SNaN, silenced - * QNaN and a non-NaN => return the QNaN - * - * If we get down to comparing significands and they are the same, - * return the NaN with the positive sign bit (if any). - */ - if (is_snan(a_cls)) { - if (is_snan(b_cls)) { - return aIsLargerSignificand ? 0 : 1; - } - return is_qnan(b_cls) ? 1 : 0; - } else if (is_qnan(a_cls)) { - if (is_snan(b_cls) || !is_qnan(b_cls)) { - return 0; + break; + case float_2nan_prop_x87: + /* + * This implements x87 NaN propagation rules: + * SNaN + QNaN => return the QNaN + * two SNaNs => return the one with the larger significand, silenced + * two QNaNs => return the one with the larger significand + * SNaN and a non-NaN => return the SNaN, silenced + * QNaN and a non-NaN => return the QNaN + * + * If we get down to comparing significands and they are the same, + * return the NaN with the positive sign bit (if any). + */ + if (is_snan(a_cls)) { + if (is_snan(b_cls)) { + return aIsLargerSignificand ? 0 : 1; + } + return is_qnan(b_cls) ? 1 : 0; + } else if (is_qnan(a_cls)) { + if (is_snan(b_cls) || !is_qnan(b_cls)) { + return 0; + } else { + return aIsLargerSignificand ? 0 : 1; + } } else { - return aIsLargerSignificand ? 0 : 1; + return 1; } - } else { - return 1; + default: + g_assert_not_reached(); } -#endif } /*---------------------------------------------------------------------------- diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h index 94cbe073ec..453188de70 100644 --- a/include/fpu/softfloat-helpers.h +++ b/include/fpu/softfloat-helpers.h @@ -75,6 +75,12 @@ static inline void set_floatx80_rounding_precision(FloatX80RoundPrec val, status->floatx80_rounding_precision = val; } +static inline void set_float_2nan_prop_rule(Float2NaNPropRule rule, + float_status *status) +{ + status->float_2nan_prop_rule = rule; +} + static inline void set_flush_to_zero(bool val, float_status *status) { status->flush_to_zero = val; @@ -126,6 +132,11 @@ get_floatx80_rounding_precision(float_status *status) return status->floatx80_rounding_precision; } +static inline Float2NaNPropRule get_float_2nan_prop_rule(float_status *status) +{ + return status->float_2nan_prop_rule; +} + static inline bool get_flush_to_zero(float_status *status) { return status->flush_to_zero; diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h index 0884ec4ef7..5cd5a0d0ae 100644 --- a/include/fpu/softfloat-types.h +++ b/include/fpu/softfloat-types.h @@ -170,6 +170,47 @@ typedef enum __attribute__((__packed__)) { floatx80_precision_s, } FloatX80RoundPrec; +/* + * 2-input NaN propagation rule. Individual architectures have + * different rules for which input NaN is propagated to the output + * when there is more than one NaN on the input. + * + * If default_nan_mode is enabled then it is valid not to set a + * NaN propagation rule, because the softfloat code guarantees + * not to try to pick a NaN to propagate in default NaN mode. + * + * For transition, currently the 'none' rule will cause us to + * fall back to picking the propagation rule based on the existing + * ifdef ladder. When all targets are converted it will be an error + * not to set the rule in float_status unless in default_nan_mode, + * and we will assert if we need to handle an input NaN and no + * rule was selected. + */ +typedef enum __attribute__((__packed__)) { + /* No propagation rule specified */ + float_2nan_prop_none = 0, + /* Prefer SNaN over QNaN, then operand A over B */ + float_2nan_prop_s_ab, + /* Prefer SNaN over QNaN, then operand B over A */ + float_2nan_prop_s_ba, + /* Prefer A over B regardless of SNaN vs QNaN */ + float_2nan_prop_ab, + /* Prefer B over A regardless of SNaN vs QNaN */ + float_2nan_prop_ba, + /* + * This implements x87 NaN propagation rules: + * SNaN + QNaN => return the QNaN + * two SNaNs => return the one with the larger significand, silenced + * two QNaNs => return the one with the larger significand + * SNaN and a non-NaN => return the SNaN, silenced + * QNaN and a non-NaN => return the QNaN + * + * If we get down to comparing significands and they are the same, + * return the NaN with the positive sign bit (if any). + */ + float_2nan_prop_x87, +} Float2NaNPropRule; + /* * Floating Point Status. Individual architectures may maintain * several versions of float_status for different functions. The @@ -181,6 +222,7 @@ typedef struct float_status { uint16_t float_exception_flags; FloatRoundMode float_rounding_mode; FloatX80RoundPrec floatx80_rounding_precision; + Float2NaNPropRule float_2nan_prop_rule; bool tininess_before_rounding; /* should denormalised results go to zero and set the inexact flag? */ bool flush_to_zero; From d22c9949d73d714e1ea2caf033131d882ad1d66a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:53 +0000 Subject: [PATCH 02/31] tests/fp: Explicitly set 2-NaN propagation rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Explicitly set a 2-NaN propagation rule in the softfloat tests. In meson.build we put -DTARGET_ARM in fpcflags, and so we should select here the Arm propagation rule of float_2nan_prop_s_ab. Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-3-peter.maydell@linaro.org --- tests/fp/fp-bench.c | 2 ++ tests/fp/fp-test-log2.c | 1 + tests/fp/fp-test.c | 2 ++ 3 files changed, 5 insertions(+) diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c index 8ce0ca1545..75c07d5d1f 100644 --- a/tests/fp/fp-bench.c +++ b/tests/fp/fp-bench.c @@ -488,6 +488,8 @@ static void run_bench(void) { bench_func_t f; + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &soft_status); + f = bench_funcs[operation][precision]; g_assert(f); f(); diff --git a/tests/fp/fp-test-log2.c b/tests/fp/fp-test-log2.c index 4eae93eb7c..de702c4c80 100644 --- a/tests/fp/fp-test-log2.c +++ b/tests/fp/fp-test-log2.c @@ -70,6 +70,7 @@ int main(int ac, char **av) float_status qsf = {0}; int i; + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &qsf); set_float_rounding_mode(float_round_nearest_even, &qsf); test.d = 0.0; diff --git a/tests/fp/fp-test.c b/tests/fp/fp-test.c index 36b5712cda..5f6f25c882 100644 --- a/tests/fp/fp-test.c +++ b/tests/fp/fp-test.c @@ -935,6 +935,8 @@ void run_test(void) { unsigned int i; + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &qsf); + genCases_setLevel(test_level); verCases_maxErrorCount = n_max_errors; From d1ff996788a41280e2e0213b9571afeca4d6ca90 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:53 +0000 Subject: [PATCH 03/31] target/arm: Explicitly set 2-NaN propagation rule Set the 2-NaN propagation rule explicitly in the float_status words we use. We wrap this plus the pre-existing setting of the tininess-before-rounding flag in a new function arm_set_default_fp_behaviours() to avoid repetition, since we have a lot of float_status words at this point. The situation with FPA11 emulation in linux-user is a little odd, and arguably "correct" behaviour there would be to exactly match a real Linux kernel's FPA11 emulation. However FPA11 emulation is essentially dead at this point and so it seems better to continue with QEMU's current behaviour and leave a comment describing the situation. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-4-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 13 ++----------- linux-user/arm/nwfpe/fpa11.c | 18 ++++++++++++++++++ target/arm/cpu.c | 25 +++++++++++++++++-------- 3 files changed, 37 insertions(+), 19 deletions(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index fae6794a15..70cd3628b5 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -402,19 +402,10 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, /* target didn't set the rule: fall back to old ifdef choices */ #if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ || defined(TARGET_RISCV) || defined(TARGET_SH4) \ - || defined(TARGET_TRICORE) + || defined(TARGET_TRICORE) || defined(TARGET_ARM) g_assert_not_reached(); -#elif defined(TARGET_ARM) || defined(TARGET_MIPS) || defined(TARGET_HPPA) || \ +#elif defined(TARGET_MIPS) || defined(TARGET_HPPA) || \ defined(TARGET_LOONGARCH64) || defined(TARGET_S390X) - /* - * ARM mandated NaN propagation rules (see FPProcessNaNs()), take - * the first of: - * 1. A if it is signaling - * 2. B if it is signaling - * 3. A (quiet) - * 4. B (quiet) - * A signaling NaN is always quietened before returning it. - */ /* * According to MIPS specifications, if one of the two operands is * a sNaN, a new qNaN has to be generated. This is done in diff --git a/linux-user/arm/nwfpe/fpa11.c b/linux-user/arm/nwfpe/fpa11.c index 9a93610d24..8356beb52c 100644 --- a/linux-user/arm/nwfpe/fpa11.c +++ b/linux-user/arm/nwfpe/fpa11.c @@ -51,6 +51,24 @@ void resetFPA11(void) #ifdef MAINTAIN_FPCR fpa11->fpcr = MASK_RESET; #endif + + /* + * Real FPA11 hardware does not handle NaNs, but always takes an + * exception for them to be software-emulated (ARM7500FE datasheet + * section 10.4). There is no documented architectural requirement + * for NaN propagation rules and it will depend on how the OS + * level software emulation opted to do it. We here use prop_s_ab + * which matches the later VFP hardware choice and how QEMU's + * fpa11 emulation has worked in the past. The real Linux kernel + * does something slightly different: arch/arm/nwfpe/softfloat-specialize + * propagateFloat64NaN() has the curious behaviour that it prefers + * the QNaN over the SNaN, but if both are QNaN it picks A and + * if both are SNaN it picks B. In theory we could add this as + * a NaN propagation rule, but in practice FPA11 emulation is so + * close to totally dead that it's not worth trying to match it at + * this late date. + */ + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &fpa11->fp_status); } void SetRoundingMode(const unsigned int opcode) diff --git a/target/arm/cpu.c b/target/arm/cpu.c index 5b751439bd..6938161b95 100644 --- a/target/arm/cpu.c +++ b/target/arm/cpu.c @@ -168,6 +168,18 @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node); } +/* + * Set the float_status behaviour to match the Arm defaults: + * * tininess-before-rounding + * * 2-input NaN propagation prefers SNaN over QNaN, and then + * operand A over operand B (see FPProcessNaNs() pseudocode) + */ +static void arm_set_default_fp_behaviours(float_status *s) +{ + set_float_detect_tininess(float_tininess_before_rounding, s); + set_float_2nan_prop_rule(float_2nan_prop_s_ab, s); +} + static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque) { /* Reset a single ARMCPRegInfo register */ @@ -549,14 +561,11 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type) set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status); set_default_nan_mode(1, &env->vfp.standard_fp_status); set_default_nan_mode(1, &env->vfp.standard_fp_status_f16); - set_float_detect_tininess(float_tininess_before_rounding, - &env->vfp.fp_status); - set_float_detect_tininess(float_tininess_before_rounding, - &env->vfp.standard_fp_status); - set_float_detect_tininess(float_tininess_before_rounding, - &env->vfp.fp_status_f16); - set_float_detect_tininess(float_tininess_before_rounding, - &env->vfp.standard_fp_status_f16); + arm_set_default_fp_behaviours(&env->vfp.fp_status); + arm_set_default_fp_behaviours(&env->vfp.standard_fp_status); + arm_set_default_fp_behaviours(&env->vfp.fp_status_f16); + arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16); + #ifndef CONFIG_USER_ONLY if (kvm_enabled()) { kvm_arm_reset_vcpu(cpu); From 0c587f13397a306f7ad4f8b0b7cb9184488012b5 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:53 +0000 Subject: [PATCH 04/31] target/mips: Explicitly set 2-NaN propagation rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the 2-NaN propagation rule explicitly in the float_status words we use. For active_fpu.fp_status, we do this in a new fp_reset() function which mirrors the existing msa_reset() function in doing "first call restore to set the fp status parts that depend on CPU state, then set the fp status parts that are constant". Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Message-id: 20241025141254.2141506-5-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 18 ++---------------- target/mips/cpu.c | 2 +- target/mips/fpu_helper.h | 22 ++++++++++++++++++++++ target/mips/msa.c | 17 +++++++++++++++++ 4 files changed, 42 insertions(+), 17 deletions(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index 70cd3628b5..c60b999aa3 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -402,24 +402,10 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, /* target didn't set the rule: fall back to old ifdef choices */ #if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ || defined(TARGET_RISCV) || defined(TARGET_SH4) \ - || defined(TARGET_TRICORE) || defined(TARGET_ARM) + || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) g_assert_not_reached(); -#elif defined(TARGET_MIPS) || defined(TARGET_HPPA) || \ +#elif defined(TARGET_HPPA) || \ defined(TARGET_LOONGARCH64) || defined(TARGET_S390X) - /* - * According to MIPS specifications, if one of the two operands is - * a sNaN, a new qNaN has to be generated. This is done in - * floatXX_silence_nan(). For qNaN inputs the specifications - * says: "When possible, this QNaN result is one of the operand QNaN - * values." In practice it seems that most implementations choose - * the first operand if both operands are qNaN. In short this gives - * the following rules: - * 1. A if it is signaling - * 2. B if it is signaling - * 3. A (quiet) - * 4. B (quiet) - * A signaling NaN is always silenced before returning it. - */ rule = float_2nan_prop_s_ab; #elif defined(TARGET_PPC) || defined(TARGET_M68K) /* diff --git a/target/mips/cpu.c b/target/mips/cpu.c index 9724e71a5e..d0a43b6d5c 100644 --- a/target/mips/cpu.c +++ b/target/mips/cpu.c @@ -407,9 +407,9 @@ static void mips_cpu_reset_hold(Object *obj, ResetType type) } msa_reset(env); + fp_reset(env); compute_hflags(env); - restore_fp_status(env); restore_pamask(env); cs->exception_index = EXCP_NONE; diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h index ad1116e8c1..7c3c7897b4 100644 --- a/target/mips/fpu_helper.h +++ b/target/mips/fpu_helper.h @@ -44,6 +44,28 @@ static inline void restore_fp_status(CPUMIPSState *env) restore_snan_bit_mode(env); } +static inline void fp_reset(CPUMIPSState *env) +{ + restore_fp_status(env); + + /* + * According to MIPS specifications, if one of the two operands is + * a sNaN, a new qNaN has to be generated. This is done in + * floatXX_silence_nan(). For qNaN inputs the specifications + * says: "When possible, this QNaN result is one of the operand QNaN + * values." In practice it seems that most implementations choose + * the first operand if both operands are qNaN. In short this gives + * the following rules: + * 1. A if it is signaling + * 2. B if it is signaling + * 3. A (quiet) + * 4. B (quiet) + * A signaling NaN is always silenced before returning it. + */ + set_float_2nan_prop_rule(float_2nan_prop_s_ab, + &env->active_fpu.fp_status); +} + /* MSA */ enum CPUMIPSMSADataFormat { diff --git a/target/mips/msa.c b/target/mips/msa.c index 61f1a9a593..9dffc428f5 100644 --- a/target/mips/msa.c +++ b/target/mips/msa.c @@ -49,6 +49,23 @@ void msa_reset(CPUMIPSState *env) set_float_detect_tininess(float_tininess_after_rounding, &env->active_tc.msa_fp_status); + /* + * According to MIPS specifications, if one of the two operands is + * a sNaN, a new qNaN has to be generated. This is done in + * floatXX_silence_nan(). For qNaN inputs the specifications + * says: "When possible, this QNaN result is one of the operand QNaN + * values." In practice it seems that most implementations choose + * the first operand if both operands are qNaN. In short this gives + * the following rules: + * 1. A if it is signaling + * 2. B if it is signaling + * 3. A (quiet) + * 4. B (quiet) + * A signaling NaN is always silenced before returning it. + */ + set_float_2nan_prop_rule(float_2nan_prop_s_ab, + &env->active_tc.msa_fp_status); + /* clear float_status exception flags */ set_float_exception_flags(0, &env->active_tc.msa_fp_status); From 1bb5257def132090bdff2cd1352841915553aaaf Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:54 +0000 Subject: [PATCH 05/31] target/loongarch: Explicitly set 2-NaN propagation rule Set the 2-NaN propagation rule explicitly in the float_status word we use. (There are a couple of places in fpu_helper.c where we create a dummy float_status word with "float_status *s = { };", but these are only used for calling float*_is_quiet_nan() so it doesn't matter that we don't set a 2-NaN propagation rule there.) Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-6-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 6 +++--- target/loongarch/tcg/fpu_helper.c | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index c60b999aa3..bbc3b70fa9 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -402,10 +402,10 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, /* target didn't set the rule: fall back to old ifdef choices */ #if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ || defined(TARGET_RISCV) || defined(TARGET_SH4) \ - || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) + || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ + || defined(TARGET_LOONGARCH64) g_assert_not_reached(); -#elif defined(TARGET_HPPA) || \ - defined(TARGET_LOONGARCH64) || defined(TARGET_S390X) +#elif defined(TARGET_HPPA) || defined(TARGET_S390X) rule = float_2nan_prop_s_ab; #elif defined(TARGET_PPC) || defined(TARGET_M68K) /* diff --git a/target/loongarch/tcg/fpu_helper.c b/target/loongarch/tcg/fpu_helper.c index f6753c5875..21bc3b04a9 100644 --- a/target/loongarch/tcg/fpu_helper.c +++ b/target/loongarch/tcg/fpu_helper.c @@ -31,6 +31,7 @@ void restore_fp_status(CPULoongArchState *env) set_float_rounding_mode(ieee_rm[(env->fcsr0 >> FCSR0_RM) & 0x3], &env->fp_status); set_flush_to_zero(0, &env->fp_status); + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fp_status); } int ieee_ex_to_loongarch(int xcpt) From 2915876e035088d516d88394028903064072543e Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:54 +0000 Subject: [PATCH 06/31] target/hppa: Explicitly set 2-NaN propagation rule Set the 2-NaN propagation rule explicitly in env->fp_status. Really we only need to do this at CPU reset (after reset has zeroed out most of the CPU state struct, which typically includes fp_status fields). However target/hppa does not currently implement CPU reset at all, so leave a TODO comment to note that this could be moved if we ever do implement reset. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-7-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 4 ++-- target/hppa/fpu_helper.c | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index bbc3b70fa9..4e51cf8d08 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -403,9 +403,9 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, #if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ || defined(TARGET_RISCV) || defined(TARGET_SH4) \ || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ - || defined(TARGET_LOONGARCH64) + || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) g_assert_not_reached(); -#elif defined(TARGET_HPPA) || defined(TARGET_S390X) +#elif defined(TARGET_S390X) rule = float_2nan_prop_s_ab; #elif defined(TARGET_PPC) || defined(TARGET_M68K) /* diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c index deaed2b65d..0e44074ba8 100644 --- a/target/hppa/fpu_helper.c +++ b/target/hppa/fpu_helper.c @@ -49,6 +49,12 @@ void HELPER(loaded_fr0)(CPUHPPAState *env) d = FIELD_EX32(shadow, FPSR, D); set_flush_to_zero(d, &env->fp_status); set_flush_inputs_to_zero(d, &env->fp_status); + + /* + * TODO: we only need to do this at CPU reset, but currently + * HPPA does note implement a CPU reset method at all... + */ + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fp_status); } void cpu_hppa_loaded_fr0(CPUHPPAState *env) From 841f9d74751f8615dbc6215504db7abbc0ebff5e Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:54 +0000 Subject: [PATCH 07/31] target/s390x: Explicitly set 2-NaN propagation rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the 2-NaN propagation rule explicitly in env->fpu_status. Signed-off-by: Peter Maydell Reviewed-by: Ilya Leoshkevich Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-8-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 5 ++--- target/s390x/cpu.c | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index 4e51cf8d08..a0c740e544 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -403,10 +403,9 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, #if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ || defined(TARGET_RISCV) || defined(TARGET_SH4) \ || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ - || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) + || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ + || defined(TARGET_S390X) g_assert_not_reached(); -#elif defined(TARGET_S390X) - rule = float_2nan_prop_s_ab; #elif defined(TARGET_PPC) || defined(TARGET_M68K) /* * PowerPC propagation rules: diff --git a/target/s390x/cpu.c b/target/s390x/cpu.c index 4e41a3dff5..514c70f301 100644 --- a/target/s390x/cpu.c +++ b/target/s390x/cpu.c @@ -205,6 +205,7 @@ static void s390_cpu_reset_hold(Object *obj, ResetType type) /* tininess for underflow is detected before rounding */ set_float_detect_tininess(float_tininess_before_rounding, &env->fpu_status); + set_float_2nan_prop_rule(float_2nan_prop_s_ab, &env->fpu_status); /* fall through */ case RESET_TYPE_S390_CPU_NORMAL: env->psw.mask &= ~PSW_MASK_RI; From 5aaab56a1ab63d7604308be4e746b8804ccff7da Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:54 +0000 Subject: [PATCH 08/31] target/ppc: Explicitly set 2-NaN propagation rule Set the 2-NaN propagation rule explicitly in env->fp_status and env->vec_status. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-9-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 10 ++-------- target/ppc/cpu_init.c | 8 ++++++++ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index a0c740e544..8e3124c11a 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -404,15 +404,9 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, || defined(TARGET_RISCV) || defined(TARGET_SH4) \ || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ - || defined(TARGET_S390X) + || defined(TARGET_S390X) || defined(TARGET_PPC) g_assert_not_reached(); -#elif defined(TARGET_PPC) || defined(TARGET_M68K) - /* - * PowerPC propagation rules: - * 1. A if it sNaN or qNaN - * 2. B if it sNaN or qNaN - * A signaling NaN is always silenced before returning it. - */ +#elif defined(TARGET_M68K) /* * M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL * 3.4 FLOATING-POINT INSTRUCTION DETAILS diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c index 23881d09e9..5c9dcd1f85 100644 --- a/target/ppc/cpu_init.c +++ b/target/ppc/cpu_init.c @@ -7326,6 +7326,14 @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type) /* tininess for underflow is detected before rounding */ set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status); + /* + * PowerPC propagation rules: + * 1. A if it sNaN or qNaN + * 2. B if it sNaN or qNaN + * A signaling NaN is always silenced before returning it. + */ + set_float_2nan_prop_rule(float_2nan_prop_ab, &env->fp_status); + set_float_2nan_prop_rule(float_2nan_prop_ab, &env->vec_status); for (i = 0; i < ARRAY_SIZE(env->spr_cb); i++) { ppc_spr_t *spr = &env->spr_cb[i]; From 0527cfd94c1d648870176b5610aaefb4fc1d7eba Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:54 +0000 Subject: [PATCH 09/31] target/m68k: Explicitly set 2-NaN propagation rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Explicitly set the 2-NaN propagation rule on env->fp_status and on the temporary fp_status that we use in frem (since we pass that to a division operation function). Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Reviewed-by: Philippe Mathieu-Daudé --- fpu/softfloat-specialize.c.inc | 19 +------------------ target/m68k/cpu.c | 16 ++++++++++++++++ target/m68k/fpu_helper.c | 1 + 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index 8e3124c11a..226632a4d1 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -404,25 +404,8 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, || defined(TARGET_RISCV) || defined(TARGET_SH4) \ || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ - || defined(TARGET_S390X) || defined(TARGET_PPC) + || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) g_assert_not_reached(); -#elif defined(TARGET_M68K) - /* - * M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL - * 3.4 FLOATING-POINT INSTRUCTION DETAILS - * If either operand, but not both operands, of an operation is a - * nonsignaling NaN, then that NaN is returned as the result. If both - * operands are nonsignaling NaNs, then the destination operand - * nonsignaling NaN is returned as the result. - * If either operand to an operation is a signaling NaN (SNaN), then the - * SNaN bit is set in the FPSR EXC byte. If the SNaN exception enable bit - * is set in the FPCR ENABLE byte, then the exception is taken and the - * destination is not modified. If the SNaN exception enable bit is not - * set, setting the SNaN bit in the operand to a one converts the SNaN to - * a nonsignaling NaN. The operation then continues as described in the - * preceding paragraph for nonsignaling NaNs. - */ - rule = float_2nan_prop_ab; #elif defined(TARGET_SPARC) /* Prefer SNaN over QNaN, order B then A. */ rule = float_2nan_prop_s_ba; diff --git a/target/m68k/cpu.c b/target/m68k/cpu.c index 1d49f4cb23..5fe335558a 100644 --- a/target/m68k/cpu.c +++ b/target/m68k/cpu.c @@ -93,6 +93,22 @@ static void m68k_cpu_reset_hold(Object *obj, ResetType type) env->fregs[i].d = nan; } cpu_m68k_set_fpcr(env, 0); + /* + * M68000 FAMILY PROGRAMMER'S REFERENCE MANUAL + * 3.4 FLOATING-POINT INSTRUCTION DETAILS + * If either operand, but not both operands, of an operation is a + * nonsignaling NaN, then that NaN is returned as the result. If both + * operands are nonsignaling NaNs, then the destination operand + * nonsignaling NaN is returned as the result. + * If either operand to an operation is a signaling NaN (SNaN), then the + * SNaN bit is set in the FPSR EXC byte. If the SNaN exception enable bit + * is set in the FPCR ENABLE byte, then the exception is taken and the + * destination is not modified. If the SNaN exception enable bit is not + * set, setting the SNaN bit in the operand to a one converts the SNaN to + * a nonsignaling NaN. The operation then continues as described in the + * preceding paragraph for nonsignaling NaNs. + */ + set_float_2nan_prop_rule(float_2nan_prop_ab, &env->fp_status); env->fpsr = 0; /* TODO: We should set PC from the interrupt vector. */ diff --git a/target/m68k/fpu_helper.c b/target/m68k/fpu_helper.c index 8314791f50..a605162b71 100644 --- a/target/m68k/fpu_helper.c +++ b/target/m68k/fpu_helper.c @@ -620,6 +620,7 @@ void HELPER(frem)(CPUM68KState *env, FPReg *res, FPReg *val0, FPReg *val1) int sign; /* Calculate quotient directly using round to nearest mode */ + set_float_2nan_prop_rule(float_2nan_prop_ab, &fp_status); set_float_rounding_mode(float_round_nearest_even, &fp_status); set_floatx80_rounding_precision( get_floatx80_rounding_precision(&env->fp_status), &fp_status); From ad58ba13d04b85c141bca2d927fb9de6490bf0c1 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:54 +0000 Subject: [PATCH 10/31] target/m68k: Initialize float_status fields in gdb set/get functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In cf_fpu_gdb_get_reg() and cf_fpu_gdb_set_reg() we use a temporary float_status variable to pass to floatx80_to_float64() and float64_to_floatx80(), but we don't initialize it, meaning that those functions could access uninitialized data. Zero-init the structs. (We don't need to set a NaN-propagation rule here because we don't use these with a 2-argument fpu operation.) Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-11-peter.maydell@linaro.org --- target/m68k/helper.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/m68k/helper.c b/target/m68k/helper.c index 9d3db8419d..9bfc6ae97c 100644 --- a/target/m68k/helper.c +++ b/target/m68k/helper.c @@ -36,7 +36,7 @@ static int cf_fpu_gdb_get_reg(CPUState *cs, GByteArray *mem_buf, int n) CPUM68KState *env = &cpu->env; if (n < 8) { - float_status s; + float_status s = {}; return gdb_get_reg64(mem_buf, floatx80_to_float64(env->fregs[n].d, &s)); } switch (n) { @@ -56,7 +56,7 @@ static int cf_fpu_gdb_set_reg(CPUState *cs, uint8_t *mem_buf, int n) CPUM68KState *env = &cpu->env; if (n < 8) { - float_status s; + float_status s = {}; env->fregs[n].d = float64_to_floatx80(ldq_be_p(mem_buf), &s); return 8; } From 65c1c039cdbcf66e9bc5b0366c8f7cc22284359a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:55 +0000 Subject: [PATCH 11/31] target/sparc: Move cpu_put_fsr(env, 0) call to reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we call cpu_put_fsr(0) in sparc_cpu_realizefn(), which initializes various fields in the CPU struct: * fsr_cexc_ftt * fcc[] * fsr_qne * fsr It also sets the rounding mode in env->fp_status. This is largely pointless, because when we later reset the CPU this will zero out all the fields up until the "end_reset_fields" label, which includes all of these (but not fp_status!) Move the cpu_put_fsr(env, 0) call to reset, because that expresses the logical requirement: we want to reset FSR to 0 on every reset. This isn't a behaviour change because the fields are all zero anyway. Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Mark Cave-Ayland Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-12-peter.maydell@linaro.org --- target/sparc/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index 54cb269e0a..e7f4068a16 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -76,6 +76,7 @@ static void sparc_cpu_reset_hold(Object *obj, ResetType type) env->npc = env->pc + 4; #endif env->cache_control = 0; + cpu_put_fsr(env, 0); } #ifndef CONFIG_USER_ONLY @@ -805,7 +806,6 @@ static void sparc_cpu_realizefn(DeviceState *dev, Error **errp) env->version |= env->def.maxtl << 8; env->version |= env->def.nwindows - 1; #endif - cpu_put_fsr(env, 0); cpu_exec_realizefn(cs, &local_err); if (local_err != NULL) { From 4482f32dcd1faa035e03ded557fa980e8b528c31 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:55 +0000 Subject: [PATCH 12/31] target/sparc: Explicitly set 2-NaN propagation rule Set the NaN propagation rule explicitly in the float_status words we use. Signed-off-by: Peter Maydell Acked-by: Mark Cave-Ayland Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-13-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 6 ++---- target/sparc/cpu.c | 8 ++++++++ target/sparc/fop_helper.c | 10 ++++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index 226632a4d1..8bc9518717 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -404,11 +404,9 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, || defined(TARGET_RISCV) || defined(TARGET_SH4) \ || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ - || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) + || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ + || defined(TARGET_SPARC) g_assert_not_reached(); -#elif defined(TARGET_SPARC) - /* Prefer SNaN over QNaN, order B then A. */ - rule = float_2nan_prop_s_ba; #elif defined(TARGET_XTENSA) /* * Xtensa has two NaN propagation modes. diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c index e7f4068a16..dd7af86de7 100644 --- a/target/sparc/cpu.c +++ b/target/sparc/cpu.c @@ -26,6 +26,7 @@ #include "hw/qdev-properties.h" #include "qapi/visitor.h" #include "tcg/tcg.h" +#include "fpu/softfloat.h" //#define DEBUG_FEATURES @@ -807,6 +808,13 @@ static void sparc_cpu_realizefn(DeviceState *dev, Error **errp) env->version |= env->def.nwindows - 1; #endif + /* + * Prefer SNaN over QNaN, order B then A. It's OK to do this in realize + * rather than reset, because fp_status is after 'end_reset_fields' in + * the CPU state struct so it won't get zeroed on reset. + */ + set_float_2nan_prop_rule(float_2nan_prop_s_ba, &env->fp_status); + cpu_exec_realizefn(cs, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c index b6692382b3..6f9ccc008a 100644 --- a/target/sparc/fop_helper.c +++ b/target/sparc/fop_helper.c @@ -497,7 +497,10 @@ uint32_t helper_flcmps(float32 src1, float32 src2) * Perform the comparison with a dummy fp environment. */ float_status discard = { }; - FloatRelation r = float32_compare_quiet(src1, src2, &discard); + FloatRelation r; + + set_float_2nan_prop_rule(float_2nan_prop_s_ba, &discard); + r = float32_compare_quiet(src1, src2, &discard); switch (r) { case float_relation_equal: @@ -518,7 +521,10 @@ uint32_t helper_flcmps(float32 src1, float32 src2) uint32_t helper_flcmpd(float64 src1, float64 src2) { float_status discard = { }; - FloatRelation r = float64_compare_quiet(src1, src2, &discard); + FloatRelation r; + + set_float_2nan_prop_rule(float_2nan_prop_s_ba, &discard); + r = float64_compare_quiet(src1, src2, &discard); switch (r) { case float_relation_equal: From 80de5f24e09cd9885a5e8d72cc01c097c23bf227 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:55 +0000 Subject: [PATCH 13/31] target/xtensa: Factor out calls to set_use_first_nan() In xtensa we currently call set_use_first_nan() in a lot of places where we want to switch the NaN-propagation handling. We're about to change the softfloat API we use to do that, so start by factoring all the calls out into a single xtensa_use_first_nan() function. The bulk of this change was done with sed -i -e 's/set_use_first_nan(\([^,]*\),[^)]*)/xtensa_use_first_nan(env, \1)/' target/xtensa/fpu_helper.c Signed-off-by: Peter Maydell Reviewed-by: Max Filippov Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-14-peter.maydell@linaro.org --- target/xtensa/cpu.c | 2 +- target/xtensa/cpu.h | 6 ++++++ target/xtensa/fpu_helper.c | 33 +++++++++++++++++++-------------- 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/target/xtensa/cpu.c b/target/xtensa/cpu.c index a08c7a0b1f..6f9039abae 100644 --- a/target/xtensa/cpu.c +++ b/target/xtensa/cpu.c @@ -134,7 +134,7 @@ static void xtensa_cpu_reset_hold(Object *obj, ResetType type) cs->halted = env->runstall; #endif set_no_signaling_nans(!dfpu, &env->fp_status); - set_use_first_nan(!dfpu, &env->fp_status); + xtensa_use_first_nan(env, !dfpu); } static ObjectClass *xtensa_cpu_class_by_name(const char *cpu_model) diff --git a/target/xtensa/cpu.h b/target/xtensa/cpu.h index 9f2341d856..77e48eef19 100644 --- a/target/xtensa/cpu.h +++ b/target/xtensa/cpu.h @@ -802,4 +802,10 @@ static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, vaddr *pc, XtensaCPU *xtensa_cpu_create_with_clock(const char *cpu_type, Clock *cpu_refclk); +/* + * Set the NaN propagation rule for future FPU operations: + * use_first is true to pick the first NaN as the result if both + * inputs are NaNs, false to pick the second. + */ +void xtensa_use_first_nan(CPUXtensaState *env, bool use_first); #endif diff --git a/target/xtensa/fpu_helper.c b/target/xtensa/fpu_helper.c index 381e83ded8..50a5efa65e 100644 --- a/target/xtensa/fpu_helper.c +++ b/target/xtensa/fpu_helper.c @@ -57,6 +57,11 @@ static const struct { { XTENSA_FP_V, float_flag_invalid, }, }; +void xtensa_use_first_nan(CPUXtensaState *env, bool use_first) +{ + set_use_first_nan(use_first, &env->fp_status); +} + void HELPER(wur_fpu2k_fcr)(CPUXtensaState *env, uint32_t v) { static const int rounding_mode[] = { @@ -171,87 +176,87 @@ float32 HELPER(fpu2k_msub_s)(CPUXtensaState *env, float64 HELPER(add_d)(CPUXtensaState *env, float64 a, float64 b) { - set_use_first_nan(true, &env->fp_status); + xtensa_use_first_nan(env, true); return float64_add(a, b, &env->fp_status); } float32 HELPER(add_s)(CPUXtensaState *env, float32 a, float32 b) { - set_use_first_nan(env->config->use_first_nan, &env->fp_status); + xtensa_use_first_nan(env, env->config->use_first_nan); return float32_add(a, b, &env->fp_status); } float64 HELPER(sub_d)(CPUXtensaState *env, float64 a, float64 b) { - set_use_first_nan(true, &env->fp_status); + xtensa_use_first_nan(env, true); return float64_sub(a, b, &env->fp_status); } float32 HELPER(sub_s)(CPUXtensaState *env, float32 a, float32 b) { - set_use_first_nan(env->config->use_first_nan, &env->fp_status); + xtensa_use_first_nan(env, env->config->use_first_nan); return float32_sub(a, b, &env->fp_status); } float64 HELPER(mul_d)(CPUXtensaState *env, float64 a, float64 b) { - set_use_first_nan(true, &env->fp_status); + xtensa_use_first_nan(env, true); return float64_mul(a, b, &env->fp_status); } float32 HELPER(mul_s)(CPUXtensaState *env, float32 a, float32 b) { - set_use_first_nan(env->config->use_first_nan, &env->fp_status); + xtensa_use_first_nan(env, env->config->use_first_nan); return float32_mul(a, b, &env->fp_status); } float64 HELPER(madd_d)(CPUXtensaState *env, float64 a, float64 b, float64 c) { - set_use_first_nan(env->config->use_first_nan, &env->fp_status); + xtensa_use_first_nan(env, env->config->use_first_nan); return float64_muladd(b, c, a, 0, &env->fp_status); } float32 HELPER(madd_s)(CPUXtensaState *env, float32 a, float32 b, float32 c) { - set_use_first_nan(env->config->use_first_nan, &env->fp_status); + xtensa_use_first_nan(env, env->config->use_first_nan); return float32_muladd(b, c, a, 0, &env->fp_status); } float64 HELPER(msub_d)(CPUXtensaState *env, float64 a, float64 b, float64 c) { - set_use_first_nan(env->config->use_first_nan, &env->fp_status); + xtensa_use_first_nan(env, env->config->use_first_nan); return float64_muladd(b, c, a, float_muladd_negate_product, &env->fp_status); } float32 HELPER(msub_s)(CPUXtensaState *env, float32 a, float32 b, float32 c) { - set_use_first_nan(env->config->use_first_nan, &env->fp_status); + xtensa_use_first_nan(env, env->config->use_first_nan); return float32_muladd(b, c, a, float_muladd_negate_product, &env->fp_status); } float64 HELPER(mkdadj_d)(CPUXtensaState *env, float64 a, float64 b) { - set_use_first_nan(true, &env->fp_status); + xtensa_use_first_nan(env, true); return float64_div(b, a, &env->fp_status); } float32 HELPER(mkdadj_s)(CPUXtensaState *env, float32 a, float32 b) { - set_use_first_nan(env->config->use_first_nan, &env->fp_status); + xtensa_use_first_nan(env, env->config->use_first_nan); return float32_div(b, a, &env->fp_status); } float64 HELPER(mksadj_d)(CPUXtensaState *env, float64 v) { - set_use_first_nan(true, &env->fp_status); + xtensa_use_first_nan(env, true); return float64_sqrt(v, &env->fp_status); } float32 HELPER(mksadj_s)(CPUXtensaState *env, float32 v) { - set_use_first_nan(env->config->use_first_nan, &env->fp_status); + xtensa_use_first_nan(env, env->config->use_first_nan); return float32_sqrt(v, &env->fp_status); } From 8d988eb44cd2723a6bd31a2895bdaba48524b6e0 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:55 +0000 Subject: [PATCH 14/31] target/xtensa: Explicitly set 2-NaN propagation rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the NaN propagation rule explicitly in xtensa_use_first_nan(). (When we convert the softfloat pickNaNMulAdd routine to also select a NaN propagation rule at runtime, we will be able to remove the use_first_nan flag because the propagation rules will handle everything.) Signed-off-by: Peter Maydell Reviewed-by: Max Filippov Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-15-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 12 +----------- target/xtensa/fpu_helper.c | 2 ++ 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index 8bc9518717..b050c5eb04 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -405,18 +405,8 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ - || defined(TARGET_SPARC) + || defined(TARGET_SPARC) || defined(TARGET_XTENSA) g_assert_not_reached(); -#elif defined(TARGET_XTENSA) - /* - * Xtensa has two NaN propagation modes. - * Which one is active is controlled by float_status::use_first_nan. - */ - if (status->use_first_nan) { - rule = float_2nan_prop_ab; - } else { - rule = float_2nan_prop_ba; - } #else rule = float_2nan_prop_x87; #endif diff --git a/target/xtensa/fpu_helper.c b/target/xtensa/fpu_helper.c index 50a5efa65e..f2d212d05d 100644 --- a/target/xtensa/fpu_helper.c +++ b/target/xtensa/fpu_helper.c @@ -60,6 +60,8 @@ static const struct { void xtensa_use_first_nan(CPUXtensaState *env, bool use_first) { set_use_first_nan(use_first, &env->fp_status); + set_float_2nan_prop_rule(use_first ? float_2nan_prop_ab : float_2nan_prop_ba, + &env->fp_status); } void HELPER(wur_fpu2k_fcr)(CPUXtensaState *env, uint32_t v) From 62d39b28ef4e7ad41a3ef3e37037615de6dfc194 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:56 +0000 Subject: [PATCH 15/31] target/i386: Set 2-NaN propagation rule explicitly Set the NaN propagation rule explicitly for the float_status words used in the x86 target. This is a no-behaviour-change commit, so we retain the existing behaviour of using the x87-style "prefer QNaN over SNaN, then prefer the NaN with the larger significand" for MMX and SSE. This is however not the documented hardware behaviour, so we leave a TODO note about what we should be doing instead. Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-16-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 3 ++- target/i386/cpu.c | 4 ++++ target/i386/cpu.h | 3 +++ target/i386/tcg/fpu_helper.c | 40 ++++++++++++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index b050c5eb04..77ebc8216f 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -405,7 +405,8 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ - || defined(TARGET_SPARC) || defined(TARGET_XTENSA) + || defined(TARGET_SPARC) || defined(TARGET_XTENSA) \ + || defined(TARGET_I386) g_assert_not_reached(); #else rule = float_2nan_prop_x87; diff --git a/target/i386/cpu.c b/target/i386/cpu.c index 3baa95481f..3d2874cf78 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -7200,6 +7200,10 @@ static void x86_cpu_reset_hold(Object *obj, ResetType type) memset(env, 0, offsetof(CPUX86State, end_reset_fields)); + if (tcg_enabled()) { + cpu_init_fp_statuses(env); + } + env->old_exception = -1; /* init to reset state */ diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 59959b8b7a..c24d81bf31 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -2614,6 +2614,9 @@ static inline bool cpu_vmx_maybe_enabled(CPUX86State *env) int get_pg_mode(CPUX86State *env); /* fpu_helper.c */ + +/* Set all non-runtime-variable float_status fields to x86 handling */ +void cpu_init_fp_statuses(CPUX86State *env); void update_fp_status(CPUX86State *env); void update_mxcsr_status(CPUX86State *env); void update_mxcsr_from_sse_status(CPUX86State *env); diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index e1b850f3fc..53b49bb297 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -135,6 +135,46 @@ static void fpu_set_exception(CPUX86State *env, int mask) } } +void cpu_init_fp_statuses(CPUX86State *env) +{ + /* + * Initialise the non-runtime-varying fields of the various + * float_status words to x86 behaviour. This must be called at + * CPU reset because the float_status words are in the + * "zeroed on reset" portion of the CPU state struct. + * Fields in float_status that vary under guest control are set + * via the codepath for setting that register, eg cpu_set_fpuc(). + */ + /* + * Use x87 NaN propagation rules: + * SNaN + QNaN => return the QNaN + * two SNaNs => return the one with the larger significand, silenced + * two QNaNs => return the one with the larger significand + * SNaN and a non-NaN => return the SNaN, silenced + * QNaN and a non-NaN => return the QNaN + * + * If we get down to comparing significands and they are the same, + * return the NaN with the positive sign bit (if any). + */ + set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); + /* + * TODO: These are incorrect: the x86 Software Developer's Manual vol 1 + * section 4.8.3.5 "Operating on SNaNs and QNaNs" says that the + * "larger significand" behaviour is only used for x87 FPU operations. + * For SSE the required behaviour is to always return the first NaN, + * which is float_2nan_prop_ab. + * + * mmx_status is used only for the AMD 3DNow! instructions, which + * are documented in the "3DNow! Technology Manual" as not supporting + * NaNs or infinities as inputs. The result of passing two NaNs is + * documented as "undefined", so we can do what we choose. + * (Strictly there is some behaviour we don't implement correctly + * for these "unsupported" NaN and Inf values, like "NaN * 0 == 0".) + */ + set_float_2nan_prop_rule(float_2nan_prop_x87, &env->mmx_status); + set_float_2nan_prop_rule(float_2nan_prop_x87, &env->sse_status); +} + static inline uint8_t save_exception_flags(CPUX86State *env) { uint8_t old_flags = get_float_exception_flags(&env->fp_status); From 8403a5015c5d5ef01792dc5ebf3c10aa5594931f Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:56 +0000 Subject: [PATCH 16/31] target/alpha: Explicitly set 2-NaN propagation rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the NaN propagation rule explicitly for the float_status word used in this target. This is a no-behaviour-change commit, so we retain the existing behaviour of x87-style pick-largest-significand NaN propagation. This is however not the architecturally correct handling, so we leave a TODO note to that effect. We also leave a TODO note pointing out that all this code in the cpu initfn (including the existing setting up of env->flags and the FPCR) should be in a currently non-existent CPU reset function. Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-17-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 2 +- target/alpha/cpu.c | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index 77ebc8216f..a5c3e2b8de 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -406,7 +406,7 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ || defined(TARGET_SPARC) || defined(TARGET_XTENSA) \ - || defined(TARGET_I386) + || defined(TARGET_I386) || defined(TARGET_ALPHA) g_assert_not_reached(); #else rule = float_2nan_prop_x87; diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c index 9db1dffc03..5d75c941f7 100644 --- a/target/alpha/cpu.c +++ b/target/alpha/cpu.c @@ -24,6 +24,7 @@ #include "qemu/qemu-print.h" #include "cpu.h" #include "exec/exec-all.h" +#include "fpu/softfloat.h" static void alpha_cpu_set_pc(CPUState *cs, vaddr value) @@ -187,7 +188,17 @@ static void alpha_cpu_initfn(Object *obj) { CPUAlphaState *env = cpu_env(CPU(obj)); + /* TODO all this should be done in reset, not init */ + env->lock_addr = -1; + + /* + * TODO: this is incorrect. The Alpha Architecture Handbook version 4 + * describes NaN propagation in section 4.7.10.4. We should prefer + * the operand in Fb (whether it is a QNaN or an SNaN), then the + * operand in Fa. That is float_2nan_prop_ba. + */ + set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); #if defined(CONFIG_USER_ONLY) env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN; cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD From c18a13edceca5419beb6eb703ffad6980d926f3b Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:56 +0000 Subject: [PATCH 17/31] target/microblaze: Move setting of float rounding mode to reset Although the floating point rounding mode for Microblaze is always nearest-even, we cannot set it just once in the CPU initfn. This is because env->fp_status is in the part of the CPU state struct that is zeroed on reset. Move the call to set_float_rounding_mode() into the reset fn. (This had no guest-visible effects because it happens that the float_round_nearest_even enum value is 0, so when the struct was zeroed it didn't corrupt the setting.) Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-18-peter.maydell@linaro.org --- target/microblaze/cpu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c index 135947ee80..6329a77433 100644 --- a/target/microblaze/cpu.c +++ b/target/microblaze/cpu.c @@ -201,6 +201,8 @@ static void mb_cpu_reset_hold(Object *obj, ResetType type) env->pc = cpu->cfg.base_vectors; + set_float_rounding_mode(float_round_nearest_even, &env->fp_status); + #if defined(CONFIG_USER_ONLY) /* start in user mode with interrupts enabled. */ mb_cpu_write_msr(env, MSR_EE | MSR_IE | MSR_VM | MSR_UM); @@ -311,15 +313,12 @@ static void mb_cpu_realizefn(DeviceState *dev, Error **errp) static void mb_cpu_initfn(Object *obj) { MicroBlazeCPU *cpu = MICROBLAZE_CPU(obj); - CPUMBState *env = &cpu->env; gdb_register_coprocessor(CPU(cpu), mb_cpu_gdb_read_stack_protect, mb_cpu_gdb_write_stack_protect, gdb_find_static_feature("microblaze-stack-protect.xml"), 0); - set_float_rounding_mode(float_round_nearest_even, &env->fp_status); - #ifndef CONFIG_USER_ONLY /* Inbound IRQ and FIR lines */ qdev_init_gpio_in(DEVICE(cpu), microblaze_cpu_set_irq, 2); From 4fafdcc833f2bfe0037512921dc37ebeed053853 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:57 +0000 Subject: [PATCH 18/31] target/microblaze: Explicitly set 2-NaN propagation rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the NaN propagation rule explicitly for the float_status word used in the microblaze target. This is probably not the architecturally correct behaviour, but since this is a no-behaviour-change patch, we leave a TODO note to that effect. Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-19-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 3 ++- target/microblaze/cpu.c | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index a5c3e2b8de..40cbb1ab73 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -406,7 +406,8 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ || defined(TARGET_SPARC) || defined(TARGET_XTENSA) \ - || defined(TARGET_I386) || defined(TARGET_ALPHA) + || defined(TARGET_I386) || defined(TARGET_ALPHA) \ + || defined(TARGET_MICROBLAZE) g_assert_not_reached(); #else rule = float_2nan_prop_x87; diff --git a/target/microblaze/cpu.c b/target/microblaze/cpu.c index 6329a77433..14286deead 100644 --- a/target/microblaze/cpu.c +++ b/target/microblaze/cpu.c @@ -202,6 +202,11 @@ static void mb_cpu_reset_hold(Object *obj, ResetType type) env->pc = cpu->cfg.base_vectors; set_float_rounding_mode(float_round_nearest_even, &env->fp_status); + /* + * TODO: this is probably not the correct NaN propagation rule for + * this architecture. + */ + set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); #if defined(CONFIG_USER_ONLY) /* start in user mode with interrupts enabled. */ From 355e6cfb94f61214ad4f633ce568debec5a8fc0d Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:57 +0000 Subject: [PATCH 19/31] target/openrisc: Explicitly set 2-NaN propagation rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the NaN propagation rule explicitly for the float_status word used in the openrisc target. Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-20-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 2 +- target/openrisc/cpu.c | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index 40cbb1ab73..ee5c73cad4 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -407,7 +407,7 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ || defined(TARGET_SPARC) || defined(TARGET_XTENSA) \ || defined(TARGET_I386) || defined(TARGET_ALPHA) \ - || defined(TARGET_MICROBLAZE) + || defined(TARGET_MICROBLAZE) || defined(TARGET_OPENRISC) g_assert_not_reached(); #else rule = float_2nan_prop_x87; diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c index 6ec54ad7a6..b96561d1f2 100644 --- a/target/openrisc/cpu.c +++ b/target/openrisc/cpu.c @@ -105,6 +105,12 @@ static void openrisc_cpu_reset_hold(Object *obj, ResetType type) set_float_detect_tininess(float_tininess_before_rounding, &cpu->env.fp_status); + /* + * TODO: this is probably not the correct NaN propagation rule for + * this architecture. + */ + set_float_2nan_prop_rule(float_2nan_prop_x87, &cpu->env.fp_status); + #ifndef CONFIG_USER_ONLY cpu->env.picmr = 0x00000000; From ba6558461cb0280ad861b376cbfff4680be82570 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:57 +0000 Subject: [PATCH 20/31] target/rx: Explicitly set 2-NaN propagation rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set the NaN propagation rule explicitly for the float_status word used in the rx target. This not the architecturally correct behaviour, but since this is a no-behaviour-change patch, we leave a TODO note to that effect. Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-21-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 3 ++- target/rx/cpu.c | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index ee5c73cad4..254bbd6716 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -407,7 +407,8 @@ static int pickNaN(FloatClass a_cls, FloatClass b_cls, || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ || defined(TARGET_SPARC) || defined(TARGET_XTENSA) \ || defined(TARGET_I386) || defined(TARGET_ALPHA) \ - || defined(TARGET_MICROBLAZE) || defined(TARGET_OPENRISC) + || defined(TARGET_MICROBLAZE) || defined(TARGET_OPENRISC) \ + || defined(TARGET_RX) g_assert_not_reached(); #else rule = float_2nan_prop_x87; diff --git a/target/rx/cpu.c b/target/rx/cpu.c index 36d2a6f189..65a74ce720 100644 --- a/target/rx/cpu.c +++ b/target/rx/cpu.c @@ -93,6 +93,13 @@ static void rx_cpu_reset_hold(Object *obj, ResetType type) env->fpsw = 0; set_flush_to_zero(1, &env->fp_status); set_flush_inputs_to_zero(1, &env->fp_status); + /* + * TODO: this is not the correct NaN propagation rule for this + * architecture. The "RX Family User's Manual: Software" table 1.6 + * defines the propagation rules as "prefer SNaN over QNaN; + * then prefer dest over source", which is float_2nan_prop_s_ab. + */ + set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status); } static ObjectClass *rx_cpu_class_by_name(const char *cpu_model) From bc0b360def4d4735c3f30e0a35ae9f49209df37a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:58 +0000 Subject: [PATCH 21/31] softfloat: Remove fallback rule from pickNaN() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that all targets have been converted to explicitly set a NaN propagation rule, we can remove the set of target ifdefs (which now list every target) and clean up the references to fallback behaviour for float_2nan_prop_none. The "default" case in the switch will catch any remaining places where status->float_2nan_prop_rule was not set by the target. Signed-off-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-id: 20241025141254.2141506-22-peter.maydell@linaro.org --- fpu/softfloat-specialize.c.inc | 23 +++-------------------- include/fpu/softfloat-types.h | 10 +++------- 2 files changed, 6 insertions(+), 27 deletions(-) diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc index 254bbd6716..b5a3208050 100644 --- a/fpu/softfloat-specialize.c.inc +++ b/fpu/softfloat-specialize.c.inc @@ -390,32 +390,15 @@ bool float32_is_signaling_nan(float32 a_, float_status *status) static int pickNaN(FloatClass a_cls, FloatClass b_cls, bool aIsLargerSignificand, float_status *status) { - Float2NaNPropRule rule = status->float_2nan_prop_rule; - /* * We guarantee not to require the target to tell us how to * pick a NaN if we're always returning the default NaN. + * But if we're not in default-NaN mode then the target must + * specify via set_float_2nan_prop_rule(). */ assert(!status->default_nan_mode); - if (rule == float_2nan_prop_none) { - /* target didn't set the rule: fall back to old ifdef choices */ -#if defined(TARGET_AVR) || defined(TARGET_HEXAGON) \ - || defined(TARGET_RISCV) || defined(TARGET_SH4) \ - || defined(TARGET_TRICORE) || defined(TARGET_ARM) || defined(TARGET_MIPS) \ - || defined(TARGET_LOONGARCH64) || defined(TARGET_HPPA) \ - || defined(TARGET_S390X) || defined(TARGET_PPC) || defined(TARGET_M68K) \ - || defined(TARGET_SPARC) || defined(TARGET_XTENSA) \ - || defined(TARGET_I386) || defined(TARGET_ALPHA) \ - || defined(TARGET_MICROBLAZE) || defined(TARGET_OPENRISC) \ - || defined(TARGET_RX) - g_assert_not_reached(); -#else - rule = float_2nan_prop_x87; -#endif - } - - switch (rule) { + switch (status->float_2nan_prop_rule) { case float_2nan_prop_s_ab: if (is_snan(a_cls)) { return 0; diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h index 5cd5a0d0ae..8f39691dfd 100644 --- a/include/fpu/softfloat-types.h +++ b/include/fpu/softfloat-types.h @@ -178,13 +178,9 @@ typedef enum __attribute__((__packed__)) { * If default_nan_mode is enabled then it is valid not to set a * NaN propagation rule, because the softfloat code guarantees * not to try to pick a NaN to propagate in default NaN mode. - * - * For transition, currently the 'none' rule will cause us to - * fall back to picking the propagation rule based on the existing - * ifdef ladder. When all targets are converted it will be an error - * not to set the rule in float_status unless in default_nan_mode, - * and we will assert if we need to handle an input NaN and no - * rule was selected. + * When not in default-NaN mode, it is an error for the target + * not to set the rule in float_status, and we will assert if + * we need to handle an input NaN and no rule was selected. */ typedef enum __attribute__((__packed__)) { /* No propagation rule specified */ From 056c5c90c171c4895b407af0cf3d198e1d44b40f Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:58 +0000 Subject: [PATCH 22/31] Revert "target/arm: Fix usage of MMU indexes when EL3 is AArch32" This reverts commit 4c2c0474693229c1f533239bb983495c5427784d. This commit tried to fix a problem with our usage of MMU indexes when EL3 is AArch32, using what it described as a "more complicated approach" where we share the same MMU index values for Secure PL1&0 and NonSecure PL1&0. In theory this should work, but the change didn't account for (at least) two things: (1) The design change means we need to flush the TLBs at any point where the CPU state flips from one to the other. We already flush the TLB when SCR.NS is changed, but we don't flush the TLB when we take an exception from NS PL1&0 into Mon or when we return from Mon to NS PL1&0, and the commit didn't add any code to do that. (2) The ATS12NS* address translate instructions allow Mon code (which is Secure) to do a stage 1+2 page table walk for NS. I thought this was OK because do_ats_write() does a page table walk which doesn't use the TLBs, so because it can pass both the MMU index and also an ARMSecuritySpace argument we can tell the table walk that we want NS stage1+2, not S. But that means that all the code within the ptw that needs to find e.g. the regime EL cannot do so only with an mmu_idx -- all these functions like regime_sctlr(), regime_el(), etc would need to pass both an mmu_idx and the security_space, so they can tell whether this is a translation regime controlled by EL1 or EL3 (and so whether to look at SCTLR.S or SCTLR.NS, etc). In particular, because regime_el() wasn't updated to look at the ARMSecuritySpace it would return 1 even when the CPU was in Monitor mode (and the controlling EL is 3). This meant that page table walks in Monitor mode would look at the wrong SCTLR, TCR, etc and would generally fault when they should not. Rather than trying to make the complicated changes needed to rescue the design of 4c2c04746932, we revert it in order to instead take the route that that commit describes as "the most straightforward" fix, where we add new MMU indexes EL30_0, EL30_3, EL30_3_PAN to correspond to "Secure PL1&0 at PL0", "Secure PL1&0 at PL1", and "Secure PL1&0 at PL1 with PAN". This revert will re-expose the "spurious alignment faults in Secure PL0" issue #2326; we'll fix it again in the next commit. Cc: qemu-stable@nongnu.org Signed-off-by: Peter Maydell Tested-by: Thomas Huth Message-id: 20241101142845.1712482-2-peter.maydell@linaro.org Reviewed-by: Richard Henderson --- target/arm/cpu.h | 31 +++++++++++++------------------ target/arm/helper.c | 34 +++++++++++----------------------- target/arm/internals.h | 27 ++++----------------------- target/arm/ptw.c | 6 +----- target/arm/tcg/hflags.c | 4 ---- target/arm/tcg/translate-a64.c | 2 +- target/arm/tcg/translate.c | 9 ++++----- target/arm/tcg/translate.h | 2 -- 8 files changed, 34 insertions(+), 81 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 8fc8b6398f..133a87e39a 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -2787,7 +2787,8 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * + NonSecure PL1 & 0 stage 1 * + NonSecure PL1 & 0 stage 2 * + NonSecure PL2 - * + Secure PL1 & 0 + * + Secure PL0 + * + Secure PL1 * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.) * * For QEMU, an mmu_idx is not quite the same as a translation regime because: @@ -2805,39 +2806,37 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * The only use of stage 2 translations is either as part of an s1+2 * lookup or when loading the descriptors during a stage 1 page table walk, * and in both those cases we don't use the TLB. - * 4. we want to be able to use the TLB for accesses done as part of a + * 4. we can also safely fold together the "32 bit EL3" and "64 bit EL3" + * translation regimes, because they map reasonably well to each other + * and they can't both be active at the same time. + * 5. we want to be able to use the TLB for accesses done as part of a * stage1 page table walk, rather than having to walk the stage2 page * table over and over. - * 5. we need separate EL1/EL2 mmu_idx for handling the Privileged Access + * 6. we need separate EL1/EL2 mmu_idx for handling the Privileged Access * Never (PAN) bit within PSTATE. - * 6. we fold together most secure and non-secure regimes for A-profile, + * 7. we fold together most secure and non-secure regimes for A-profile, * because there are no banked system registers for aarch64, so the * process of switching between secure and non-secure is * already heavyweight. - * 7. we cannot fold together Stage 2 Secure and Stage 2 NonSecure, + * 8. we cannot fold together Stage 2 Secure and Stage 2 NonSecure, * because both are in use simultaneously for Secure EL2. * * This gives us the following list of cases: * - * EL0 EL1&0 stage 1+2 (or AArch32 PL0 PL1&0 stage 1+2) - * EL1 EL1&0 stage 1+2 (or AArch32 PL1 PL1&0 stage 1+2) - * EL1 EL1&0 stage 1+2 +PAN (or AArch32 PL1 PL1&0 stage 1+2 +PAN) + * EL0 EL1&0 stage 1+2 (aka NS PL0) + * EL1 EL1&0 stage 1+2 (aka NS PL1) + * EL1 EL1&0 stage 1+2 +PAN * EL0 EL2&0 * EL2 EL2&0 * EL2 EL2&0 +PAN * EL2 (aka NS PL2) - * EL3 (not used when EL3 is AArch32) + * EL3 (aka S PL1) * Stage2 Secure * Stage2 NonSecure * plus one TLB per Physical address space: S, NS, Realm, Root * * for a total of 14 different mmu_idx. * - * Note that when EL3 is AArch32, the usage is potentially confusing - * because the MMU indexes are named for their AArch64 use, so code - * using the ARMMMUIdx_E10_1 might be at EL3, not EL1. This is because - * Secure PL1 is always at EL3. - * * R profile CPUs have an MPU, but can use the same set of MMU indexes * as A profile. They only need to distinguish EL0 and EL1 (and * EL2 for cores like the Cortex-R52). @@ -3130,10 +3129,6 @@ FIELD(TBFLAG_A32, NS, 10, 1) * This requires an SME trap from AArch32 mode when using NEON. */ FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1) -/* - * Indicates whether we are in the Secure PL1&0 translation regime - */ -FIELD(TBFLAG_A32, S_PL1_0, 12, 1) /* * Bit usage when in AArch32 state, for M-profile only. diff --git a/target/arm/helper.c b/target/arm/helper.c index 0a731a38e8..0900034d42 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -3701,7 +3701,7 @@ static uint64_t do_ats_write(CPUARMState *env, uint64_t value, */ format64 = arm_s1_regime_using_lpae_format(env, mmu_idx); - if (arm_feature(env, ARM_FEATURE_EL2) && !arm_aa32_secure_pl1_0(env)) { + if (arm_feature(env, ARM_FEATURE_EL2)) { if (mmu_idx == ARMMMUIdx_E10_0 || mmu_idx == ARMMMUIdx_E10_1 || mmu_idx == ARMMMUIdx_E10_1_PAN) { @@ -3775,11 +3775,13 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) case 0: /* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */ switch (el) { + case 3: + mmu_idx = ARMMMUIdx_E3; + break; case 2: g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ /* fall through */ case 1: - case 3: if (ri->crm == 9 && arm_pan_enabled(env)) { mmu_idx = ARMMMUIdx_Stage1_E1_PAN; } else { @@ -11860,11 +11862,8 @@ void arm_cpu_do_interrupt(CPUState *cs) uint64_t arm_sctlr(CPUARMState *env, int el) { - if (arm_aa32_secure_pl1_0(env)) { - /* In Secure PL1&0 SCTLR_S is always controlling */ - el = 3; - } else if (el == 0) { - /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */ + /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */ + if (el == 0) { ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0); el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1; } @@ -12524,12 +12523,8 @@ int fp_exception_el(CPUARMState *env, int cur_el) return 0; } -/* - * Return the exception level we're running at if this is our mmu_idx. - * s_pl1_0 should be true if this is the AArch32 Secure PL1&0 translation - * regime. - */ -int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx, bool s_pl1_0) +/* Return the exception level we're running at if this is our mmu_idx */ +int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) { if (mmu_idx & ARM_MMU_IDX_M) { return mmu_idx & ARM_MMU_IDX_M_PRIV; @@ -12541,7 +12536,7 @@ int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx, bool s_pl1_0) return 0; case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: - return s_pl1_0 ? 3 : 1; + return 1; case ARMMMUIdx_E2: case ARMMMUIdx_E20_2: case ARMMMUIdx_E20_2_PAN: @@ -12579,15 +12574,6 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) idx = ARMMMUIdx_E10_0; } break; - case 3: - /* - * AArch64 EL3 has its own translation regime; AArch32 EL3 - * uses the Secure PL1&0 translation regime. - */ - if (arm_el_is_aa64(env, 3)) { - return ARMMMUIdx_E3; - } - /* fall through */ case 1: if (arm_pan_enabled(env)) { idx = ARMMMUIdx_E10_1_PAN; @@ -12607,6 +12593,8 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) idx = ARMMMUIdx_E2; } break; + case 3: + return ARMMMUIdx_E3; default: g_assert_not_reached(); } diff --git a/target/arm/internals.h b/target/arm/internals.h index fd8f7c82aa..f43d97c59a 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -275,20 +275,6 @@ FIELD(CNTHCTL, CNTPMASK, 19, 1) #define M_FAKE_FSR_NSC_EXEC 0xf /* NS executing in S&NSC memory */ #define M_FAKE_FSR_SFAULT 0xe /* SecureFault INVTRAN, INVEP or AUVIOL */ -/** - * arm_aa32_secure_pl1_0(): Return true if in Secure PL1&0 regime - * - * Return true if the CPU is in the Secure PL1&0 translation regime. - * This requires that EL3 exists and is AArch32 and we are currently - * Secure. If this is the case then the ARMMMUIdx_E10* apply and - * mean we are in EL3, not EL1. - */ -static inline bool arm_aa32_secure_pl1_0(CPUARMState *env) -{ - return arm_feature(env, ARM_FEATURE_EL3) && - !arm_el_is_aa64(env, 3) && arm_is_secure(env); -} - /** * raise_exception: Raise the specified exception. * Raise a guest exception with the specified value, syndrome register @@ -841,12 +827,7 @@ static inline ARMMMUIdx core_to_aa64_mmu_idx(int mmu_idx) return mmu_idx | ARM_MMU_IDX_A; } -/** - * Return the exception level we're running at if our current MMU index - * is @mmu_idx. @s_pl1_0 should be true if this is the AArch32 - * Secure PL1&0 translation regime. - */ -int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx, bool s_pl1_0); +int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx); /* Return the MMU index for a v7M CPU in the specified security state */ ARMMMUIdx arm_v7m_mmu_idx_for_secstate(CPUARMState *env, bool secstate); @@ -941,11 +922,11 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) return 3; case ARMMMUIdx_E10_0: case ARMMMUIdx_Stage1_E0: - case ARMMMUIdx_E10_1: - case ARMMMUIdx_E10_1_PAN: + return arm_el_is_aa64(env, 3) || !arm_is_secure_below_el3(env) ? 1 : 3; case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: - return arm_el_is_aa64(env, 3) || !arm_is_secure_below_el3(env) ? 1 : 3; + case ARMMMUIdx_E10_1: + case ARMMMUIdx_E10_1_PAN: case ARMMMUIdx_MPrivNegPri: case ARMMMUIdx_MUserNegPri: case ARMMMUIdx_MPriv: diff --git a/target/arm/ptw.c b/target/arm/ptw.c index dd40268397..ba3dd38a72 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -3607,11 +3607,7 @@ bool get_phys_addr(CPUARMState *env, vaddr address, case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: case ARMMMUIdx_E2: - if (arm_aa32_secure_pl1_0(env)) { - ss = ARMSS_Secure; - } else { - ss = arm_security_space_below_el3(env); - } + ss = arm_security_space_below_el3(env); break; case ARMMMUIdx_Stage2: /* diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c index bab7822ef6..f03977b4b0 100644 --- a/target/arm/tcg/hflags.c +++ b/target/arm/tcg/hflags.c @@ -198,10 +198,6 @@ static CPUARMTBFlags rebuild_hflags_a32(CPUARMState *env, int fp_el, DP_TBFLAG_A32(flags, SME_TRAP_NONSTREAMING, 1); } - if (arm_aa32_secure_pl1_0(env)) { - DP_TBFLAG_A32(flags, S_PL1_0, 1); - } - return rebuild_hflags_common_32(env, fp_el, mmu_idx, flags); } diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index ec0b1ee252..b2851ea503 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -11690,7 +11690,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->tbii = EX_TBFLAG_A64(tb_flags, TBII); dc->tbid = EX_TBFLAG_A64(tb_flags, TBID); dc->tcma = EX_TBFLAG_A64(tb_flags, TCMA); - dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx, false); + dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); #if !defined(CONFIG_USER_ONLY) dc->user = (dc->current_el == 0); #endif diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index e2748ff2bb..c5bc691d92 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -7546,6 +7546,10 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) core_mmu_idx = EX_TBFLAG_ANY(tb_flags, MMUIDX); dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx); + dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx); +#if !defined(CONFIG_USER_ONLY) + dc->user = (dc->current_el == 0); +#endif dc->fp_excp_el = EX_TBFLAG_ANY(tb_flags, FPEXC_EL); dc->align_mem = EX_TBFLAG_ANY(tb_flags, ALIGN_MEM); dc->pstate_il = EX_TBFLAG_ANY(tb_flags, PSTATE__IL); @@ -7576,12 +7580,7 @@ static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) } dc->sme_trap_nonstreaming = EX_TBFLAG_A32(tb_flags, SME_TRAP_NONSTREAMING); - dc->s_pl1_0 = EX_TBFLAG_A32(tb_flags, S_PL1_0); } - dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx, dc->s_pl1_0); -#if !defined(CONFIG_USER_ONLY) - dc->user = (dc->current_el == 0); -#endif dc->lse2 = false; /* applies only to aarch64 */ dc->cp_regs = cpu->cp_regs; dc->features = env->features; diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h index 5a2e10d64d..20cd0e851c 100644 --- a/target/arm/tcg/translate.h +++ b/target/arm/tcg/translate.h @@ -165,8 +165,6 @@ typedef struct DisasContext { uint8_t gm_blocksize; /* True if the current insn_start has been updated. */ bool insn_start_updated; - /* True if this is the AArch32 Secure PL1&0 translation regime */ - bool s_pl1_0; /* Bottom two bits of XScale c15_cpar coprocessor access control reg */ int c15_cpar; /* Offset from VNCR_EL2 when FEAT_NV2 redirects this reg to memory */ From efbe180ad2ed75d4cc64dfc6fb46a015eef713d1 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:58 +0000 Subject: [PATCH 23/31] target/arm: Add new MMU indexes for AArch32 Secure PL1&0 Our current usage of MMU indexes when EL3 is AArch32 is confused. Architecturally, when EL3 is AArch32, all Secure code runs under the Secure PL1&0 translation regime: * code at EL3, which might be Mon, or SVC, or any of the other privileged modes (PL1) * code at EL0 (Secure PL0) This is different from when EL3 is AArch64, in which case EL3 is its own translation regime, and EL1 and EL0 (whether AArch32 or AArch64) have their own regime. We claimed to be mapping Secure PL1 to our ARMMMUIdx_EL3, but didn't do anything special about Secure PL0, which meant it used the same ARMMMUIdx_EL10_0 that NonSecure PL0 does. This resulted in a bug where arm_sctlr() incorrectly picked the NonSecure SCTLR as the controlling register when in Secure PL0, which meant we were spuriously generating alignment faults because we were looking at the wrong SCTLR control bits. The use of ARMMMUIdx_EL3 for Secure PL1 also resulted in the bug that we wouldn't honour the PAN bit for Secure PL1, because there's no equivalent _PAN mmu index for it. Fix this by adding two new MMU indexes: * ARMMMUIdx_E30_0 is for Secure PL0 * ARMMMUIdx_E30_3_PAN is for Secure PL1 when PAN is enabled The existing ARMMMUIdx_E3 is used to mean "Secure PL1 without PAN" (and would be named ARMMMUIdx_E30_3 in an AArch32-centric scheme). These extra two indexes bring us up to the maximum of 16 that the core code can currently support. This commit: * adds the new MMU index handling to the various places where we deal in MMU index values * adds assertions that we aren't AArch32 EL3 in a couple of places that currently use the E10 indexes, to document why they don't also need to handle the E30 indexes * documents in a comment why regime_has_2_ranges() doesn't need updating Notes for backporting: this commit depends on the preceding revert of 4c2c04746932; that revert and this commit should probably be backported to everywhere that we originally backported 4c2c04746932. Cc: qemu-stable@nongnu.org Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2326 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2588 Signed-off-by: Peter Maydell Tested-by: Thomas Huth Reviewed-by: Richard Henderson Message-id: 20241101142845.1712482-3-peter.maydell@linaro.org --- target/arm/cpu.h | 31 ++++++++++++++++++------------- target/arm/helper.c | 38 ++++++++++++++++++++++++++++++++++---- target/arm/internals.h | 16 ++++++++++++++-- target/arm/ptw.c | 4 ++++ target/arm/tcg/op_helper.c | 14 +++++++++++++- target/arm/tcg/translate.c | 3 +++ 6 files changed, 86 insertions(+), 20 deletions(-) diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 133a87e39a..fb0f217b19 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -2787,8 +2787,7 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * + NonSecure PL1 & 0 stage 1 * + NonSecure PL1 & 0 stage 2 * + NonSecure PL2 - * + Secure PL0 - * + Secure PL1 + * + Secure PL1 & 0 * (reminder: for 32 bit EL3, Secure PL1 is *EL3*, not EL1.) * * For QEMU, an mmu_idx is not quite the same as a translation regime because: @@ -2823,19 +2822,21 @@ bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync); * * This gives us the following list of cases: * - * EL0 EL1&0 stage 1+2 (aka NS PL0) - * EL1 EL1&0 stage 1+2 (aka NS PL1) - * EL1 EL1&0 stage 1+2 +PAN + * EL0 EL1&0 stage 1+2 (aka NS PL0 PL1&0 stage 1+2) + * EL1 EL1&0 stage 1+2 (aka NS PL1 PL1&0 stage 1+2) + * EL1 EL1&0 stage 1+2 +PAN (aka NS PL1 P1&0 stage 1+2 +PAN) * EL0 EL2&0 * EL2 EL2&0 * EL2 EL2&0 +PAN * EL2 (aka NS PL2) - * EL3 (aka S PL1) + * EL3 (aka AArch32 S PL1 PL1&0) + * AArch32 S PL0 PL1&0 (we call this EL30_0) + * AArch32 S PL1 PL1&0 +PAN (we call this EL30_3_PAN) * Stage2 Secure * Stage2 NonSecure * plus one TLB per Physical address space: S, NS, Realm, Root * - * for a total of 14 different mmu_idx. + * for a total of 16 different mmu_idx. * * R profile CPUs have an MPU, but can use the same set of MMU indexes * as A profile. They only need to distinguish EL0 and EL1 (and @@ -2899,6 +2900,8 @@ typedef enum ARMMMUIdx { ARMMMUIdx_E20_2_PAN = 5 | ARM_MMU_IDX_A, ARMMMUIdx_E2 = 6 | ARM_MMU_IDX_A, ARMMMUIdx_E3 = 7 | ARM_MMU_IDX_A, + ARMMMUIdx_E30_0 = 8 | ARM_MMU_IDX_A, + ARMMMUIdx_E30_3_PAN = 9 | ARM_MMU_IDX_A, /* * Used for second stage of an S12 page table walk, or for descriptor @@ -2906,14 +2909,14 @@ typedef enum ARMMMUIdx { * are in use simultaneously for SecureEL2: the security state for * the S2 ptw is selected by the NS bit from the S1 ptw. */ - ARMMMUIdx_Stage2_S = 8 | ARM_MMU_IDX_A, - ARMMMUIdx_Stage2 = 9 | ARM_MMU_IDX_A, + ARMMMUIdx_Stage2_S = 10 | ARM_MMU_IDX_A, + ARMMMUIdx_Stage2 = 11 | ARM_MMU_IDX_A, /* TLBs with 1-1 mapping to the physical address spaces. */ - ARMMMUIdx_Phys_S = 10 | ARM_MMU_IDX_A, - ARMMMUIdx_Phys_NS = 11 | ARM_MMU_IDX_A, - ARMMMUIdx_Phys_Root = 12 | ARM_MMU_IDX_A, - ARMMMUIdx_Phys_Realm = 13 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_S = 12 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_NS = 13 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_Root = 14 | ARM_MMU_IDX_A, + ARMMMUIdx_Phys_Realm = 15 | ARM_MMU_IDX_A, /* * These are not allocated TLBs and are used only for AT system @@ -2952,6 +2955,8 @@ typedef enum ARMMMUIdxBit { TO_CORE_BIT(E20_2), TO_CORE_BIT(E20_2_PAN), TO_CORE_BIT(E3), + TO_CORE_BIT(E30_0), + TO_CORE_BIT(E30_3_PAN), TO_CORE_BIT(Stage2), TO_CORE_BIT(Stage2_S), diff --git a/target/arm/helper.c b/target/arm/helper.c index 0900034d42..8c4f86f475 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -444,6 +444,9 @@ static int alle1_tlbmask(CPUARMState *env) * Note that the 'ALL' scope must invalidate both stage 1 and * stage 2 translations, whereas most other scopes only invalidate * stage 1 translations. + * + * For AArch32 this is only used for TLBIALLNSNH and VTTBR + * writes, so only needs to apply to NS PL1&0, not S PL1&0. */ return (ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | @@ -3776,7 +3779,11 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* stage 1 current state PL1: ATS1CPR, ATS1CPW, ATS1CPRP, ATS1CPWP */ switch (el) { case 3: - mmu_idx = ARMMMUIdx_E3; + if (ri->crm == 9 && arm_pan_enabled(env)) { + mmu_idx = ARMMMUIdx_E30_3_PAN; + } else { + mmu_idx = ARMMMUIdx_E3; + } break; case 2: g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ @@ -3796,7 +3803,7 @@ static void ats_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value) /* stage 1 current state PL0: ATS1CUR, ATS1CUW */ switch (el) { case 3: - mmu_idx = ARMMMUIdx_E10_0; + mmu_idx = ARMMMUIdx_E30_0; break; case 2: g_assert(ss != ARMSS_Secure); /* ARMv8.4-SecEL2 is 64-bit only */ @@ -4906,11 +4913,14 @@ static int vae1_tlbmask(CPUARMState *env) uint64_t hcr = arm_hcr_el2_eff(env); uint16_t mask; + assert(arm_feature(env, ARM_FEATURE_AARCH64)); + if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { mask = ARMMMUIdxBit_E20_2 | ARMMMUIdxBit_E20_2_PAN | ARMMMUIdxBit_E20_0; } else { + /* This is AArch64 only, so we don't need to touch the EL30_x TLBs */ mask = ARMMMUIdxBit_E10_1 | ARMMMUIdxBit_E10_1_PAN | ARMMMUIdxBit_E10_0; @@ -4949,6 +4959,8 @@ static int vae1_tlbbits(CPUARMState *env, uint64_t addr) uint64_t hcr = arm_hcr_el2_eff(env); ARMMMUIdx mmu_idx; + assert(arm_feature(env, ARM_FEATURE_AARCH64)); + /* Only the regime of the mmu_idx below is significant. */ if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { mmu_idx = ARMMMUIdx_E20_0; @@ -11862,10 +11874,20 @@ void arm_cpu_do_interrupt(CPUState *cs) uint64_t arm_sctlr(CPUARMState *env, int el) { - /* Only EL0 needs to be adjusted for EL1&0 or EL2&0. */ + /* Only EL0 needs to be adjusted for EL1&0 or EL2&0 or EL3&0 */ if (el == 0) { ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0); - el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1; + switch (mmu_idx) { + case ARMMMUIdx_E20_0: + el = 2; + break; + case ARMMMUIdx_E30_0: + el = 3; + break; + default: + el = 1; + break; + } } return env->cp15.sctlr_el[el]; } @@ -12533,6 +12555,7 @@ int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) switch (mmu_idx) { case ARMMMUIdx_E10_0: case ARMMMUIdx_E20_0: + case ARMMMUIdx_E30_0: return 0; case ARMMMUIdx_E10_1: case ARMMMUIdx_E10_1_PAN: @@ -12542,6 +12565,7 @@ int arm_mmu_idx_to_el(ARMMMUIdx mmu_idx) case ARMMMUIdx_E20_2_PAN: return 2; case ARMMMUIdx_E3: + case ARMMMUIdx_E30_3_PAN: return 3; default: g_assert_not_reached(); @@ -12570,6 +12594,9 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) hcr = arm_hcr_el2_eff(env); if ((hcr & (HCR_E2H | HCR_TGE)) == (HCR_E2H | HCR_TGE)) { idx = ARMMMUIdx_E20_0; + } else if (arm_is_secure_below_el3(env) && + !arm_el_is_aa64(env, 3)) { + idx = ARMMMUIdx_E30_0; } else { idx = ARMMMUIdx_E10_0; } @@ -12594,6 +12621,9 @@ ARMMMUIdx arm_mmu_idx_el(CPUARMState *env, int el) } break; case 3: + if (!arm_el_is_aa64(env, 3) && arm_pan_enabled(env)) { + return ARMMMUIdx_E30_3_PAN; + } return ARMMMUIdx_E3; default: g_assert_not_reached(); diff --git a/target/arm/internals.h b/target/arm/internals.h index f43d97c59a..e37f459af3 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -871,7 +871,16 @@ static inline void arm_call_el_change_hook(ARMCPU *cpu) } } -/* Return true if this address translation regime has two ranges. */ +/* + * Return true if this address translation regime has two ranges. + * Note that this will not return the correct answer for AArch32 + * Secure PL1&0 (i.e. mmu indexes E3, E30_0, E30_3_PAN), but it is + * never called from a context where EL3 can be AArch32. (The + * correct return value for ARMMMUIdx_E3 would be different for + * that case, so we can't just make the function return the + * correct value anyway; we would need an extra "bool e3_is_aarch32" + * argument which all the current callsites would pass as 'false'.) + */ static inline bool regime_has_2_ranges(ARMMMUIdx mmu_idx) { switch (mmu_idx) { @@ -896,6 +905,7 @@ static inline bool regime_is_pan(CPUARMState *env, ARMMMUIdx mmu_idx) case ARMMMUIdx_Stage1_E1_PAN: case ARMMMUIdx_E10_1_PAN: case ARMMMUIdx_E20_2_PAN: + case ARMMMUIdx_E30_3_PAN: return true; default: return false; @@ -919,10 +929,11 @@ static inline uint32_t regime_el(CPUARMState *env, ARMMMUIdx mmu_idx) case ARMMMUIdx_E2: return 2; case ARMMMUIdx_E3: + case ARMMMUIdx_E30_0: + case ARMMMUIdx_E30_3_PAN: return 3; case ARMMMUIdx_E10_0: case ARMMMUIdx_Stage1_E0: - return arm_el_is_aa64(env, 3) || !arm_is_secure_below_el3(env) ? 1 : 3; case ARMMMUIdx_Stage1_E1: case ARMMMUIdx_Stage1_E1_PAN: case ARMMMUIdx_E10_1: @@ -946,6 +957,7 @@ static inline bool regime_is_user(CPUARMState *env, ARMMMUIdx mmu_idx) switch (mmu_idx) { case ARMMMUIdx_E10_0: case ARMMMUIdx_E20_0: + case ARMMMUIdx_E30_0: case ARMMMUIdx_Stage1_E0: case ARMMMUIdx_MUser: case ARMMMUIdx_MSUser: diff --git a/target/arm/ptw.c b/target/arm/ptw.c index ba3dd38a72..9849949508 100644 --- a/target/arm/ptw.c +++ b/target/arm/ptw.c @@ -280,6 +280,8 @@ static bool regime_translation_disabled(CPUARMState *env, ARMMMUIdx mmu_idx, case ARMMMUIdx_E20_2_PAN: case ARMMMUIdx_E2: case ARMMMUIdx_E3: + case ARMMMUIdx_E30_0: + case ARMMMUIdx_E30_3_PAN: break; case ARMMMUIdx_Phys_S: @@ -3635,6 +3637,8 @@ bool get_phys_addr(CPUARMState *env, vaddr address, ss = ARMSS_Secure; break; case ARMMMUIdx_E3: + case ARMMMUIdx_E30_0: + case ARMMMUIdx_E30_3_PAN: if (arm_feature(env, ARM_FEATURE_AARCH64) && cpu_isar_feature(aa64_rme, env_archcpu(env))) { ss = ARMSS_Root; diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c index c083e5cfb8..1ecb465988 100644 --- a/target/arm/tcg/op_helper.c +++ b/target/arm/tcg/op_helper.c @@ -912,7 +912,19 @@ void HELPER(tidcp_el0)(CPUARMState *env, uint32_t syndrome) { /* See arm_sctlr(), but we also need the sctlr el. */ ARMMMUIdx mmu_idx = arm_mmu_idx_el(env, 0); - int target_el = mmu_idx == ARMMMUIdx_E20_0 ? 2 : 1; + int target_el; + + switch (mmu_idx) { + case ARMMMUIdx_E20_0: + target_el = 2; + break; + case ARMMMUIdx_E30_0: + target_el = 3; + break; + default: + target_el = 1; + break; + } /* * The bit is not valid unless the target el is aa64, but since the diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c index c5bc691d92..9ee761fc64 100644 --- a/target/arm/tcg/translate.c +++ b/target/arm/tcg/translate.c @@ -228,6 +228,9 @@ static inline int get_a32_user_mem_index(DisasContext *s) */ switch (s->mmu_idx) { case ARMMMUIdx_E3: + case ARMMMUIdx_E30_0: + case ARMMMUIdx_E30_3_PAN: + return arm_to_core_mmu_idx(ARMMMUIdx_E30_0); case ARMMMUIdx_E2: /* this one is UNPREDICTABLE */ case ARMMMUIdx_E10_0: case ARMMMUIdx_E10_1: From e6b2fa1b81ac6b05c4397237c846a295a9857920 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Tue, 5 Nov 2024 10:09:58 +0000 Subject: [PATCH 24/31] target/arm: Fix SVE SDOT/UDOT/USDOT (4-way, indexed) Our implementation of the indexed version of SVE SDOT/UDOT/USDOT got the calculation of the inner loop terminator wrong. Although we correctly account for the element size when we calculate the terminator for the first iteration: intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n); we don't do that when we move it forward after the first inner loop completes. The intention is that we process the vector in 128-bit segments, which for a 64-bit element size should mean (1, 2), (3, 4), (5, 6), etc. This bug meant that we would iterate (1, 2), (3, 4, 5, 6), (7, 8, 9, 10) etc and apply the wrong indexed element to some of the operations, and also index off the end of the vector. You don't see this bug if the vector length is small enough that we don't need to iterate the outer loop, i.e. if it is only 128 bits, or if it is the 64-bit special case from AA32/AA64 AdvSIMD. If the vector length is 256 bits then we calculate the right results for the elements in the vector but do index off the end of the vector. Vector lengths greater than 256 bits see wrong answers. The instructions that produce 32-bit results behave correctly. Fix the recalculation of 'segend' for subsequent iterations, and restore a version of the comment that was lost in the refactor of commit 7020ffd656a5 that explains why we only need to clamp segend to opr_sz_n for the first iteration, not the later ones. Cc: qemu-stable@nongnu.org Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2595 Fixes: 7020ffd656a5 ("target/arm: Macroize helper_gvec_{s,u}dot_idx_{b,h}") Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson Message-id: 20241101185544.2130972-1-peter.maydell@linaro.org --- target/arm/tcg/vec_helper.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c index 22ddb96881..e825d501a2 100644 --- a/target/arm/tcg/vec_helper.c +++ b/target/arm/tcg/vec_helper.c @@ -836,6 +836,13 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ { \ intptr_t i = 0, opr_sz = simd_oprsz(desc); \ intptr_t opr_sz_n = opr_sz / sizeof(TYPED); \ + /* \ + * Special case: opr_sz == 8 from AA64/AA32 advsimd means the \ + * first iteration might not be a full 16 byte segment. But \ + * for vector lengths beyond that this must be SVE and we know \ + * opr_sz is a multiple of 16, so we need not clamp segend \ + * to opr_sz_n when we advance it at the end of the loop. \ + */ \ intptr_t segend = MIN(16 / sizeof(TYPED), opr_sz_n); \ intptr_t index = simd_data(desc); \ TYPED *d = vd, *a = va; \ @@ -853,7 +860,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, uint32_t desc) \ n[i * 4 + 2] * m2 + \ n[i * 4 + 3] * m3); \ } while (++i < segend); \ - segend = i + 4; \ + segend = i + (16 / sizeof(TYPED)); \ } while (i < opr_sz_n); \ clear_tail(d, opr_sz, simd_maxsz(desc)); \ } From a5c02408c1de0a0592d90f153328a4295b6fbca6 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 5 Nov 2024 10:09:59 +0000 Subject: [PATCH 25/31] disas: Fix build against Capstone v6 (again) Like 9971cbac2f3, which set CAPSTONE_AARCH64_COMPAT_HEADER, also set CAPSTONE_SYSTEMZ_COMPAT_HEADER. Fixes the build against capstone v6-alpha. Signed-off-by: Richard Henderson Reviewed-by: Gustavo Romero Message-id: 20241022013047.830273-1-richard.henderson@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- include/disas/capstone.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/disas/capstone.h b/include/disas/capstone.h index a11985151d..c43033f7f6 100644 --- a/include/disas/capstone.h +++ b/include/disas/capstone.h @@ -4,6 +4,7 @@ #ifdef CONFIG_CAPSTONE #define CAPSTONE_AARCH64_COMPAT_HEADER +#define CAPSTONE_SYSTEMZ_COMPAT_HEADER #include #else From e8217c573f9777d90333f5f92aa35d3ae71a7bd6 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Tue, 5 Nov 2024 10:09:59 +0000 Subject: [PATCH 26/31] hw/rtc/ds1338: Trace send and receive operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Bernhard Beschow Message-id: 20241103143330.123596-2-shentey@gmail.com Signed-off-by: Peter Maydell --- hw/rtc/ds1338.c | 6 ++++++ hw/rtc/trace-events | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/hw/rtc/ds1338.c b/hw/rtc/ds1338.c index a5fe221418..929a92f7bd 100644 --- a/hw/rtc/ds1338.c +++ b/hw/rtc/ds1338.c @@ -17,6 +17,7 @@ #include "qemu/module.h" #include "qom/object.h" #include "sysemu/rtc.h" +#include "trace.h" /* Size of NVRAM including both the user-accessible area and the * secondary register area. @@ -126,6 +127,9 @@ static uint8_t ds1338_recv(I2CSlave *i2c) uint8_t res; res = s->nvram[s->ptr]; + + trace_ds1338_recv(s->ptr, res); + inc_regptr(s); return res; } @@ -134,6 +138,8 @@ static int ds1338_send(I2CSlave *i2c, uint8_t data) { DS1338State *s = DS1338(i2c); + trace_ds1338_send(s->ptr, data); + if (s->addr_byte) { s->ptr = data & (NVRAM_SIZE - 1); s->addr_byte = false; diff --git a/hw/rtc/trace-events b/hw/rtc/trace-events index ebb311a5b0..8012afe102 100644 --- a/hw/rtc/trace-events +++ b/hw/rtc/trace-events @@ -22,6 +22,10 @@ pl031_set_alarm(uint32_t ticks) "alarm set for %u ticks" aspeed_rtc_read(uint64_t addr, uint64_t value) "addr 0x%02" PRIx64 " value 0x%08" PRIx64 aspeed_rtc_write(uint64_t addr, uint64_t value) "addr 0x%02" PRIx64 " value 0x%08" PRIx64 +# ds1338.c +ds1338_recv(uint32_t addr, uint8_t value) "[0x%" PRIx32 "] -> 0x%02" PRIx8 +ds1338_send(uint32_t addr, uint8_t value) "[0x%" PRIx32 "] <- 0x%02" PRIx8 + # m48t59.c m48txx_nvram_io_read(uint64_t addr, uint64_t value) "io read addr:0x%04" PRIx64 " value:0x%02" PRIx64 m48txx_nvram_io_write(uint64_t addr, uint64_t value) "io write addr:0x%04" PRIx64 " value:0x%02" PRIx64 From afd431e45aac6d0e3529ca2ea02d77be90f14397 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Tue, 5 Nov 2024 10:09:59 +0000 Subject: [PATCH 27/31] hw/timer/imx_gpt: Convert DPRINTF to trace events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Bernhard Beschow Message-id: 20241103143330.123596-3-shentey@gmail.com Signed-off-by: Peter Maydell --- hw/timer/imx_gpt.c | 18 +++++------------- hw/timer/trace-events | 6 ++++++ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/hw/timer/imx_gpt.c b/hw/timer/imx_gpt.c index 23b3d79bdb..2663a9d9ef 100644 --- a/hw/timer/imx_gpt.c +++ b/hw/timer/imx_gpt.c @@ -18,19 +18,12 @@ #include "migration/vmstate.h" #include "qemu/module.h" #include "qemu/log.h" +#include "trace.h" #ifndef DEBUG_IMX_GPT #define DEBUG_IMX_GPT 0 #endif -#define DPRINTF(fmt, args...) \ - do { \ - if (DEBUG_IMX_GPT) { \ - fprintf(stderr, "[%s]%s: " fmt , TYPE_IMX_GPT, \ - __func__, ##args); \ - } \ - } while (0) - static const char *imx_gpt_reg_name(uint32_t reg) { switch (reg) { @@ -145,7 +138,7 @@ static void imx_gpt_set_freq(IMXGPTState *s) s->freq = imx_ccm_get_clock_frequency(s->ccm, s->clocks[clksrc]) / (1 + s->pr); - DPRINTF("Setting clksrc %d to frequency %d\n", clksrc, s->freq); + trace_imx_gpt_set_freq(clksrc, s->freq); if (s->freq) { ptimer_set_freq(s->timer, s->freq); @@ -317,7 +310,7 @@ static uint64_t imx_gpt_read(void *opaque, hwaddr offset, unsigned size) break; } - DPRINTF("(%s) = 0x%08x\n", imx_gpt_reg_name(offset >> 2), reg_value); + trace_imx_gpt_read(imx_gpt_reg_name(offset >> 2), reg_value); return reg_value; } @@ -384,8 +377,7 @@ static void imx_gpt_write(void *opaque, hwaddr offset, uint64_t value, IMXGPTState *s = IMX_GPT(opaque); uint32_t oldreg; - DPRINTF("(%s, value = 0x%08x)\n", imx_gpt_reg_name(offset >> 2), - (uint32_t)value); + trace_imx_gpt_write(imx_gpt_reg_name(offset >> 2), (uint32_t)value); switch (offset >> 2) { case 0: @@ -485,7 +477,7 @@ static void imx_gpt_timeout(void *opaque) { IMXGPTState *s = IMX_GPT(opaque); - DPRINTF("\n"); + trace_imx_gpt_timeout(); s->sr |= s->next_int; s->next_int = 0; diff --git a/hw/timer/trace-events b/hw/timer/trace-events index f48a712801..5cfc369fba 100644 --- a/hw/timer/trace-events +++ b/hw/timer/trace-events @@ -49,6 +49,12 @@ cmsdk_apb_dualtimer_read(uint64_t offset, uint64_t data, unsigned size) "CMSDK A cmsdk_apb_dualtimer_write(uint64_t offset, uint64_t data, unsigned size) "CMSDK APB dualtimer write: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u" cmsdk_apb_dualtimer_reset(void) "CMSDK APB dualtimer: reset" +# imx_gpt.c +imx_gpt_set_freq(uint32_t clksrc, uint32_t freq) "Setting clksrc %u to %u Hz" +imx_gpt_read(const char *name, uint64_t value) "%s -> 0x%08" PRIx64 +imx_gpt_write(const char *name, uint64_t value) "%s <- 0x%08" PRIx64 +imx_gpt_timeout(void) "" + # npcm7xx_timer.c npcm7xx_timer_read(const char *id, uint64_t offset, uint64_t value) " %s offset: 0x%04" PRIx64 " value 0x%08" PRIx64 npcm7xx_timer_write(const char *id, uint64_t offset, uint64_t value) "%s offset: 0x%04" PRIx64 " value 0x%08" PRIx64 From fe06088b3c5b3bd1a31b499db38b9542deaa2a3e Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Tue, 5 Nov 2024 10:10:00 +0000 Subject: [PATCH 28/31] hw/watchdog/wdt_imx2: Remove redundant assignment The same statement is executed unconditionally right before the if statement. Cc: Guenter Roeck Reviewed-by: Guenter Roeck Signed-off-by: Bernhard Beschow Reviewed-by: Richard Henderson Message-id: 20241103143330.123596-4-shentey@gmail.com Signed-off-by: Peter Maydell --- hw/watchdog/wdt_imx2.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/watchdog/wdt_imx2.c b/hw/watchdog/wdt_imx2.c index be63d421da..8162d58afa 100644 --- a/hw/watchdog/wdt_imx2.c +++ b/hw/watchdog/wdt_imx2.c @@ -39,7 +39,6 @@ static void imx2_wdt_expired(void *opaque) /* Perform watchdog action if watchdog is enabled */ if (s->wcr & IMX2_WDT_WCR_WDE) { - s->wrsr = IMX2_WDT_WRSR_TOUT; watchdog_perform_action(); } } From 3647dca9fbf4fb267c401cdd94519b7a60af0b0b Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Tue, 5 Nov 2024 10:10:00 +0000 Subject: [PATCH 29/31] hw/sensor/tmp105: Convert printf() to trace event, add tracing for read/write access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit printf() unconditionally prints to the console which disturbs `-serial stdio`. Fix that by converting into a trace event. While at it, add some tracing for read and write access. Fixes: 7e7c5e4c1ba5 "Nokia N800 machine support (ARM)." Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-id: 20241103143330.123596-5-shentey@gmail.com Signed-off-by: Peter Maydell --- hw/sensor/tmp105.c | 7 ++++++- hw/sensor/trace-events | 6 ++++++ hw/sensor/trace.h | 1 + meson.build | 1 + 4 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 hw/sensor/trace-events create mode 100644 hw/sensor/trace.h diff --git a/hw/sensor/tmp105.c b/hw/sensor/tmp105.c index 9d7b911f59..ef2824f3e1 100644 --- a/hw/sensor/tmp105.c +++ b/hw/sensor/tmp105.c @@ -27,6 +27,7 @@ #include "qapi/visitor.h" #include "qemu/module.h" #include "hw/registerfields.h" +#include "trace.h" FIELD(CONFIG, SHUTDOWN_MODE, 0, 1) FIELD(CONFIG, THERMOSTAT_MODE, 1, 1) @@ -150,17 +151,21 @@ static void tmp105_read(TMP105State *s) s->buf[s->len++] = ((uint16_t) s->limit[1]) >> 0; break; } + + trace_tmp105_read(s->i2c.address, s->pointer); } static void tmp105_write(TMP105State *s) { + trace_tmp105_write(s->i2c.address, s->pointer); + switch (s->pointer & 3) { case TMP105_REG_TEMPERATURE: break; case TMP105_REG_CONFIG: if (FIELD_EX8(s->buf[0] & ~s->config, CONFIG, SHUTDOWN_MODE)) { - printf("%s: TMP105 shutdown\n", __func__); + trace_tmp105_write_shutdown(s->i2c.address); } s->config = FIELD_DP8(s->buf[0], CONFIG, ONE_SHOT, 0); s->faults = tmp105_faultq[FIELD_EX8(s->config, CONFIG, FAULT_QUEUE)]; diff --git a/hw/sensor/trace-events b/hw/sensor/trace-events new file mode 100644 index 0000000000..a3fe54fa6d --- /dev/null +++ b/hw/sensor/trace-events @@ -0,0 +1,6 @@ +# See docs/devel/tracing.rst for syntax documentation. + +# tmp105.c +tmp105_read(uint8_t dev, uint8_t addr) "device: 0x%02x, addr: 0x%02x" +tmp105_write(uint8_t dev, uint8_t addr) "device: 0x%02x, addr 0x%02x" +tmp105_write_shutdown(uint8_t dev) "device: 0x%02x" diff --git a/hw/sensor/trace.h b/hw/sensor/trace.h new file mode 100644 index 0000000000..e4721560b0 --- /dev/null +++ b/hw/sensor/trace.h @@ -0,0 +1 @@ +#include "trace/trace-hw_sensor.h" diff --git a/meson.build b/meson.build index c386593c52..4262e1e84a 100644 --- a/meson.build +++ b/meson.build @@ -3484,6 +3484,7 @@ if have_system 'hw/s390x', 'hw/scsi', 'hw/sd', + 'hw/sensor', 'hw/sh4', 'hw/sparc', 'hw/sparc64', From ab4b56d981d753eb2ecc610d2b5d5689716aae9c Mon Sep 17 00:00:00 2001 From: Nabih Estefan Date: Tue, 5 Nov 2024 10:10:00 +0000 Subject: [PATCH 30/31] hw/net/npcm_gmac: Change error log to trace event MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert the LOG_GUEST_ERROR for the "tx descriptor is owned by software" to a trace message. This condition is normal when there is there is nothing to transmit, and we would otherwise spam the logs with it in that situation. Signed-off-by: Nabih Estefan Signed-off-by: Roque Arcudia Hernandez Reviewed-by: Philippe Mathieu-Daudé Message-id: 20241014184847.1594056-1-roqueh@google.com [PMM: tweaked commit message] Signed-off-by: Peter Maydell --- hw/net/npcm_gmac.c | 5 ++--- hw/net/trace-events | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/net/npcm_gmac.c b/hw/net/npcm_gmac.c index 6fa6bece61..685905f9e2 100644 --- a/hw/net/npcm_gmac.c +++ b/hw/net/npcm_gmac.c @@ -546,9 +546,8 @@ static void gmac_try_send_next_packet(NPCMGMACState *gmac) /* 1 = DMA Owned, 0 = Software Owned */ if (!(tx_desc.tdes0 & TX_DESC_TDES0_OWN)) { - qemu_log_mask(LOG_GUEST_ERROR, - "TX Descriptor @ 0x%x is owned by software\n", - desc_addr); + trace_npcm_gmac_tx_desc_owner(DEVICE(gmac)->canonical_path, + desc_addr); gmac->regs[R_NPCM_DMA_STATUS] |= NPCM_DMA_STATUS_TU; gmac_dma_set_state(gmac, NPCM_DMA_STATUS_TX_PROCESS_STATE_SHIFT, NPCM_DMA_STATUS_TX_SUSPENDED_STATE); diff --git a/hw/net/trace-events b/hw/net/trace-events index 91a3d0c054..d0f1d8c0fb 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -478,6 +478,7 @@ npcm_gmac_packet_received(const char* name, uint32_t len) "%s: Reception finishe npcm_gmac_packet_sent(const char* name, uint16_t len) "%s: TX packet sent!, length: 0x%04" PRIX16 npcm_gmac_debug_desc_data(const char* name, void* addr, uint32_t des0, uint32_t des1, uint32_t des2, uint32_t des3)"%s: Address: %p Descriptor 0: 0x%04" PRIX32 " Descriptor 1: 0x%04" PRIX32 "Descriptor 2: 0x%04" PRIX32 " Descriptor 3: 0x%04" PRIX32 npcm_gmac_packet_tx_desc_data(const char* name, uint32_t tdes0, uint32_t tdes1) "%s: Tdes0: 0x%04" PRIX32 " Tdes1: 0x%04" PRIX32 +npcm_gmac_tx_desc_owner(const char* name, uint32_t desc_addr) "%s: TX Descriptor @0x%04" PRIX32 " is owned by software" # npcm_pcs.c npcm_pcs_reg_read(const char *name, uint16_t indirect_access_baes, uint64_t offset, uint16_t value) "%s: IND: 0x%02" PRIx16 " offset: 0x%04" PRIx64 " value: 0x%04" PRIx16 From 374cdc8efe4a039510cca47e8399d54a1aeb4f2d Mon Sep 17 00:00:00 2001 From: Gustavo Romero Date: Tue, 5 Nov 2024 10:10:00 +0000 Subject: [PATCH 31/31] target/arm: Enable FEAT_CMOW for -cpu max FEAT_CMOW introduces support for controlling cache maintenance instructions executed in EL0/1 and is mandatory from Armv8.8. On real hardware, the main use for this feature is to prevent processes from invalidating or flushing cache lines for addresses they only have read permission, which can impact the performance of other processes. QEMU implements all cache instructions as NOPs, and, according to rule [1], which states that generating any Permission fault when a cache instruction is implemented as a NOP is implementation-defined, no Permission fault is generated for any cache instruction when it lacks read and write permissions. QEMU does not model any cache topology, so the PoU and PoC are before any cache, and rules [2] apply. These rules state that generating any MMU fault for cache instructions in this topology is also implementation-defined. Therefore, for FEAT_CMOW, we do not generate any MMU faults either, instead, we only advertise it in the feature register. [1] Rule R_HGLYG of section D8.14.3, Arm ARM K.a. [2] Rules R_MZTNR and R_DNZYL of section D8.14.3, Arm ARM K.a. Signed-off-by: Gustavo Romero Reviewed-by: Richard Henderson Message-id: 20241104142606.941638-1-gustavo.romero@linaro.org Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- docs/system/arm/emulation.rst | 1 + target/arm/cpu-features.h | 5 +++++ target/arm/cpu.h | 1 + target/arm/helper.c | 5 +++++ target/arm/tcg/cpu64.c | 1 + 5 files changed, 13 insertions(+) diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst index 35f52a54b1..a2a388f091 100644 --- a/docs/system/arm/emulation.rst +++ b/docs/system/arm/emulation.rst @@ -26,6 +26,7 @@ the following architecture extensions: - FEAT_BF16 (AArch64 BFloat16 instructions) - FEAT_BTI (Branch Target Identification) - FEAT_CCIDX (Extended cache index) +- FEAT_CMOW (Control for cache maintenance permission) - FEAT_CRC32 (CRC32 instructions) - FEAT_Crypto (Cryptographic Extension) - FEAT_CSV2 (Cache speculation variant 2) diff --git a/target/arm/cpu-features.h b/target/arm/cpu-features.h index 04ce281826..e806f138b8 100644 --- a/target/arm/cpu-features.h +++ b/target/arm/cpu-features.h @@ -802,6 +802,11 @@ static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0; } +static inline bool isar_feature_aa64_cmow(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, CMOW) != 0; +} + static inline bool isar_feature_aa64_hafs(const ARMISARegisters *id) { return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HAFDBS) != 0; diff --git a/target/arm/cpu.h b/target/arm/cpu.h index fb0f217b19..d86e641280 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1367,6 +1367,7 @@ void pmu_init(ARMCPU *cpu); #define SCTLR_EnIB (1U << 30) /* v8.3, AArch64 only */ #define SCTLR_EnIA (1U << 31) /* v8.3, AArch64 only */ #define SCTLR_DSSBS_32 (1U << 31) /* v8.5, AArch32 only */ +#define SCTLR_CMOW (1ULL << 32) /* FEAT_CMOW */ #define SCTLR_MSCEN (1ULL << 33) /* FEAT_MOPS */ #define SCTLR_BT0 (1ULL << 35) /* v8.5-BTI */ #define SCTLR_BT1 (1ULL << 36) /* v8.5-BTI */ diff --git a/target/arm/helper.c b/target/arm/helper.c index 8c4f86f475..f38eb054c0 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -6229,6 +6229,11 @@ static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri, if (cpu_isar_feature(aa64_nmi, cpu)) { valid_mask |= HCRX_TALLINT | HCRX_VINMI | HCRX_VFNMI; } + /* FEAT_CMOW adds CMOW */ + + if (cpu_isar_feature(aa64_cmow, cpu)) { + valid_mask |= HCRX_CMOW; + } /* Clear RES0 bits. */ env->cp15.hcrx_el2 = value & valid_mask; diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 0168920828..2963d7510f 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -1218,6 +1218,7 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */ t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */ t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */ + t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */ cpu->isar.id_aa64mmfr1 = t; t = cpu->isar.id_aa64mmfr2;