target-arm queue:

* gdbstub: Send a reply to the vKill packet * Improve codegen for neon min/max and saturating arithmetic * Fix a bug in clearing FPSCR exception status bits * hw/arm/armsse: Fix miswiring of expansion IRQs * hw/intc/armv7m_nvic: Allow byte accesses to SHPR1 * MAINTAINERS: Remove Peter Crosthwaite from various entries * arm: Allow system registers for KVM guests to be changed by QEMU code * linux-user: support HWCAP_CPUID which exposes ID registers to user code * Fix bug in 128-bit cmpxchg for BE Arm guests * Implement (no-op) HACR_EL2 * Fix CRn to be 14 for PMEVTYPER/PMEVCNTR -----BEGIN PGP SIGNATURE----- iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAlxmkiIZHHBldGVyLm1h eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3vzlD/wKPD9Hg45ieqwFHA5ovKFr 31gNuDIUZpGf8ohKCUF/SXZkq6T81eWFn2+J60UQFyAw0owGiqn24Er7LKJQNqBw kwFGAwBUZ2Kbs4qg+RRlHYpC+IZf5MIRbLSWdy9neAyewRniekzR1Gw5+NamY4nw rJLlSezL2cxG8nKea/HKM1gmgk3wUxL4mfbC+ZLmY87ZTYzRb38q1V4iU2MV59FD U23+s5kJ21/LcQwWLFJE6Dr9AuGHEIsFxWvgFbhB61tR5wX2Vv19dFS12KJibII2 7IX6V+mo+aeQCp0GAMOe9yi+Xd2txgiAZ/mV0JnYykU19oEFFlDGtnptVGNU0paM rn0KrdO2mBIuIw995yFPkUMtpU1EnrG7e7XtDGdrlXxeRwUfD+okLOUSsTTYPYiF uPhAu6SRvGKZ1qYFjR5v2WnUsITMbH/I+oIh+unIyxm+lO/zBEybPvWpCXgDE36D J30Ol1j/3UNcQH9SnvCTvFopH16A8nJIqYB5rJ2SclUPr+aTWdYI5Lrs+opgrN+H LeOStYkRXibgwFX3L5ZdDEGhoyuIKLpPVlaTBRM9Y3ld5huBd02m93L4z1rrdd/g +q0Zg4X28u3Fq1wBx/bHhkwH7+Oa9f0D0dOSctQGCHzEDYHt4dBTx2lXqJI4Etab fzMeuoqBo33lSZMTBLXsAA== =xqVQ -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20190215' into staging target-arm queue: * gdbstub: Send a reply to the vKill packet * Improve codegen for neon min/max and saturating arithmetic * Fix a bug in clearing FPSCR exception status bits * hw/arm/armsse: Fix miswiring of expansion IRQs * hw/intc/armv7m_nvic: Allow byte accesses to SHPR1 * MAINTAINERS: Remove Peter Crosthwaite from various entries * arm: Allow system registers for KVM guests to be changed by QEMU code * linux-user: support HWCAP_CPUID which exposes ID registers to user code * Fix bug in 128-bit cmpxchg for BE Arm guests * Implement (no-op) HACR_EL2 * Fix CRn to be 14 for PMEVTYPER/PMEVCNTR # gpg: Signature made Fri 15 Feb 2019 10:19:14 GMT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20190215: (25 commits) gdbstub: Send a reply to the vKill packet. target/arm: Add missing clear_tail calls target/arm: Use vector operations for saturation target/arm: Split out FPSCR.QC to a vector field target/arm: Fix set of bits kept in xregs[ARM_VFP_FPSCR] target/arm: Split out flags setting from vfp compares target/arm: Fix arm_cpu_dump_state vs FPSCR target/arm: Fix vfp_gdb_get/set_reg vs FPSCR target/arm: Remove neon min/max helpers target/arm: Use tcg integer min/max primitives for neon target/arm: Use vector minmax expanders for aarch32 target/arm: Use vector minmax expanders for aarch64 target/arm: Rely on optimization within tcg_gen_gvec_or hw/arm/armsse: Fix miswiring of expansion IRQs hw/intc/armv7m_nvic: Allow byte accesses to SHPR1 MAINTAINERS: Remove Peter Crosthwaite from various entries arm: Allow system registers for KVM guests to be changed by QEMU code linux-user/elfload: enable HWCAP_CPUID for AArch64 target/arm: expose remaining CPUID registers as RAZ target/arm: expose MPIDR_EL1 to userspace ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
2019-02-15 10:20:04 +00:00 · 2019-02-15 10:20:04 +00:00 · 81dbcfa9e1
commit 81dbcfa9e1
parent 0266c739ab 0f8b09b222
18 changed files with 622 additions and 195 deletions
--- a/4
+++ b/4
@ -110,7 +110,6 @@ Guest CPU cores (TCG):
 ----------------------
 Overall
 L: qemu-devel@nongnu.org
-M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
 M: Richard Henderson <rth@twiddle.net>
 R: Paolo Bonzini <pbonzini@redhat.com>
 S: Maintained
@ -1345,7 +1344,6 @@ F: tests/virtio-scsi-test.c
 T: git https://github.com/bonzini/qemu.git scsi-next

 SSI
-M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
 M: Alistair Francis <alistair@alistair23.me>
 S: Maintained
 F: hw/ssi/*
@ -1356,7 +1354,6 @@ F: tests/m25p80-test.c

 Xilinx SPI
 M: Alistair Francis <alistair@alistair23.me>
-M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
 S: Maintained
 F: hw/ssi/xilinx_*

@ -1766,7 +1763,6 @@ F: qom/cpu.c
 F: include/qom/cpu.h

 Device Tree
-M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
 M: Alexander Graf <agraf@suse.de>
 S: Maintained
 F: device_tree.c
--- a/gdbstub.c
+++ b/gdbstub.c
@ -1361,6 +1361,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
            break;
        } else if (strncmp(p, "Kill;", 5) == 0) {
            /* Kill the target */
+            put_packet(s, "OK");
            error_report("QEMU: Terminated via GDBstub");
            exit(0);
        } else {
--- a/hw/arm/armsse.c
+++ b/hw/arm/armsse.c
@ -565,7 +565,7 @@ static void armsse_realize(DeviceState *dev, Error **errp)
        /* Connect EXP_IRQ/EXP_CPUn_IRQ GPIOs to the NVIC's lines 32 and up */
        s->exp_irqs[i] = g_new(qemu_irq, s->exp_numirq);
        for (j = 0; j < s->exp_numirq; j++) {
-            s->exp_irqs[i][j] = qdev_get_gpio_in(cpudev, i + 32);
+            s->exp_irqs[i][j] = qdev_get_gpio_in(cpudev, j + 32);
        }
        if (i == 0) {
            gpioname = g_strdup("EXP_IRQ");
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@ -1841,7 +1841,7 @@ static MemTxResult nvic_sysreg_read(void *opaque, hwaddr addr,
            }
        }
        break;
-    case 0xd18: /* System Handler Priority (SHPR1) */
+    case 0xd18 ... 0xd1b: /* System Handler Priority (SHPR1) */
        if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) {
            val = 0;
            break;
@ -1956,7 +1956,7 @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
        }
        nvic_irq_update(s);
        return MEMTX_OK;
-    case 0xd18: /* System Handler Priority (SHPR1) */
+    case 0xd18 ... 0xd1b: /* System Handler Priority (SHPR1) */
        if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) {
            return MEMTX_OK;
        }
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@ -580,6 +580,7 @@ static uint32_t get_elf_hwcap(void)

    hwcaps |= ARM_HWCAP_A64_FP;
    hwcaps |= ARM_HWCAP_A64_ASIMD;
+    hwcaps |= ARM_HWCAP_A64_CPUID;

    /* probe for the extra features */
 #define GET_FEATURE_ID(feat, hwcap) \
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@ -577,11 +577,13 @@ typedef struct CPUARMState {
        ARMPredicateReg preg_tmp;
 #endif

-        uint32_t xregs[16];
        /* We store these fpcsr fields separately for convenience.  */
+        uint32_t qc[4] QEMU_ALIGNED(16);
        int vec_len;
        int vec_stride;

+        uint32_t xregs[16];
+
        /* Scratch space for aa32 neon expansion.  */
        uint32_t scratch[8];

@ -1427,6 +1429,7 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val);
 #define FPCR_FZ16   (1 << 19)   /* ARMv8.2+, FP16 flush-to-zero */
 #define FPCR_FZ     (1 << 24)   /* Flush-to-zero enable bit */
 #define FPCR_DN     (1 << 25)   /* Default NaN enable bit */
+#define FPCR_QC     (1 << 27)   /* Cumulative saturation bit */

 static inline uint32_t vfp_get_fpsr(CPUARMState *env)
 {
@ -2226,6 +2229,18 @@ static inline bool cptype_valid(int cptype)
 #define PL0_R (0x02 | PL1_R)
 #define PL0_W (0x01 | PL1_W)

+/*
+ * For user-mode some registers are accessible to EL0 via a kernel
+ * trap-and-emulate ABI. In this case we define the read permissions
+ * as actually being PL0_R. However some bits of any given register
+ * may still be masked.
+ */
+#ifdef CONFIG_USER_ONLY
+#define PL0U_R PL0_R
+#else
+#define PL0U_R PL1_R
+#endif
+
 #define PL3_RW (PL3_R | PL3_W)
 #define PL2_RW (PL2_R | PL2_W)
 #define PL1_RW (PL1_R | PL1_W)
@ -2452,6 +2467,30 @@ static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs)
 }
 const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp);

+/*
+ * Definition of an ARM co-processor register as viewed from
+ * userspace. This is used for presenting sanitised versions of
+ * registers to userspace when emulating the Linux AArch64 CPU
+ * ID/feature ABI (advertised as HWCAP_CPUID).
+ */
+typedef struct ARMCPRegUserSpaceInfo {
+    /* Name of register */
+    const char *name;
+
+    /* Is the name actually a glob pattern */
+    bool is_glob;
+
+    /* Only some bits are exported to user space */
+    uint64_t exported_bits;
+
+    /* Fixed bits are applied after the mask */
+    uint64_t fixed_bits;
+} ARMCPRegUserSpaceInfo;
+
+#define REGUSERINFO_SENTINEL { .name = NULL }
+
+void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods);
+
 /* CPWriteFn that can be used to implement writes-ignored behaviour */
 void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
                         uint64_t value);
@ -2499,18 +2538,25 @@ bool write_list_to_cpustate(ARMCPU *cpu);
 /**
 * write_cpustate_to_list:
 * @cpu: ARMCPU
+ * @kvm_sync: true if this is for syncing back to KVM
 *
 * For each register listed in the ARMCPU cpreg_indexes list, write
 * its value from the ARMCPUState structure into the cpreg_values list.
 * This is used to copy info from TCG's working data structures into
 * KVM or for outbound migration.
 *
+ * @kvm_sync is true if we are doing this in order to sync the
+ * register state back to KVM. In this case we will only update
+ * values in the list if the previous list->cpustate sync actually
+ * successfully wrote the CPU state. Otherwise we will keep the value
+ * that is in the list.
+ *
 * Returns: true if all register values were read correctly,
 * false if some register was unknown or could not be read.
 * Note that we do not stop early on failure -- we will attempt
 * reading all registers in the list.
 */
-bool write_cpustate_to_list(ARMCPU *cpu);
+bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);

 #define ARM_CPUID_TI915T      0x54029152
 #define ARM_CPUID_TI925T      0x54029252
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@ -583,8 +583,8 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
     * High and low need to be switched here because this is not actually a
     * 128bit store but two doublewords stored consecutively
     */
-    Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
-    Int128 newv = int128_make128(new_lo, new_hi);
+    Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
+    Int128 newv = int128_make128(new_hi, new_lo);
    Int128 oldv;
    uintptr_t ra = GETPC();
    uint64_t o0, o1;
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@ -81,7 +81,7 @@ static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg)
    }
    switch (reg - nregs) {
    case 0: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSID]); return 4;
-    case 1: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSCR]); return 4;
+    case 1: stl_p(buf, vfp_get_fpscr(env)); return 4;
    case 2: stl_p(buf, env->vfp.xregs[ARM_VFP_FPEXC]); return 4;
    }
    return 0;
@ -107,7 +107,7 @@ static int vfp_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
    }
    switch (reg - nregs) {
    case 0: env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf); return 4;
-    case 1: env->vfp.xregs[ARM_VFP_FPSCR] = ldl_p(buf); return 4;
+    case 1: vfp_set_fpscr(env, ldl_p(buf)); return 4;
    case 2: env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30); return 4;
    }
    return 0;
@ -264,7 +264,7 @@ static bool raw_accessors_invalid(const ARMCPRegInfo *ri)
    return true;
 }

-bool write_cpustate_to_list(ARMCPU *cpu)
+bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync)
 {
    /* Write the coprocessor state from cpu->env to the (index,value) list. */
    int i;
@ -273,6 +273,7 @@ bool write_cpustate_to_list(ARMCPU *cpu)
    for (i = 0; i < cpu->cpreg_array_len; i++) {
        uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]);
        const ARMCPRegInfo *ri;
+        uint64_t newval;

        ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
        if (!ri) {
@ -282,7 +283,29 @@ bool write_cpustate_to_list(ARMCPU *cpu)
        if (ri->type & ARM_CP_NO_RAW) {
            continue;
        }
-        cpu->cpreg_values[i] = read_raw_cp_reg(&cpu->env, ri);
+
+        newval = read_raw_cp_reg(&cpu->env, ri);
+        if (kvm_sync) {
+            /*
+             * Only sync if the previous list->cpustate sync succeeded.
+             * Rather than tracking the success/failure state for every
+             * item in the list, we just recheck "does the raw write we must
+             * have made in write_list_to_cpustate() read back OK" here.
+             */
+            uint64_t oldval = cpu->cpreg_values[i];
+
+            if (oldval == newval) {
+                continue;
+            }
+
+            write_raw_cp_reg(&cpu->env, ri, oldval);
+            if (read_raw_cp_reg(&cpu->env, ri) != oldval) {
+                continue;
+            }
+
+            write_raw_cp_reg(&cpu->env, ri, newval);
+        }
+        cpu->cpreg_values[i] = newval;
    }
    return ok;
 }
@ -3657,13 +3680,6 @@ static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
    return mpidr_read_val(env);
 }

-static const ARMCPRegInfo mpidr_cp_reginfo[] = {
-    { .name = "MPIDR", .state = ARM_CP_STATE_BOTH,
-      .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
-      .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW },
-    REGINFO_SENTINEL
-};
-
 static const ARMCPRegInfo lpae_cp_reginfo[] = {
    /* NOP AMAIR0/1 */
    { .name = "AMAIR0", .state = ARM_CP_STATE_BOTH,
@ -4434,6 +4450,9 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
      .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
      .access = PL2_RW,
      .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "HACR_EL2", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7,
+      .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
    { .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH,
      .opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0,
      .access = PL2_RW,
@ -4666,6 +4685,9 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
      .cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
      .access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
      .writefn = hcr_writelow },
+    { .name = "HACR_EL2", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7,
+      .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
    { .name = "ELR_EL2", .state = ARM_CP_STATE_AA64,
      .type = ARM_CP_ALIAS,
      .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1,
@ -5855,25 +5877,25 @@ void register_cp_regs_for_features(ARMCPU *cpu)
            char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i);
            char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i);
            ARMCPRegInfo pmev_regs[] = {
-                { .name = pmevcntr_name, .cp = 15, .crn = 15,
+                { .name = pmevcntr_name, .cp = 15, .crn = 14,
                  .crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7,
                  .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS,
                  .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn,
                  .accessfn = pmreg_access },
                { .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64,
-                  .opc0 = 3, .opc1 = 3, .crn = 15, .crm = 8 | (3 & (i >> 3)),
+                  .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)),
                  .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access,
                  .type = ARM_CP_IO,
                  .readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn,
                  .raw_readfn = pmevcntr_rawread,
                  .raw_writefn = pmevcntr_rawwrite },
-                { .name = pmevtyper_name, .cp = 15, .crn = 15,
+                { .name = pmevtyper_name, .cp = 15, .crn = 14,
                  .crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7,
                  .access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS,
                  .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn,
                  .accessfn = pmreg_access },
                { .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64,
-                  .opc0 = 3, .opc1 = 3, .crn = 15, .crm = 12 | (3 & (i >> 3)),
+                  .opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)),
                  .opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access,
                  .type = ARM_CP_IO,
                  .readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn,
@ -6103,6 +6125,38 @@ void register_cp_regs_for_features(ARMCPU *cpu)
              .resetvalue = cpu->pmceid1 },
            REGINFO_SENTINEL
        };
+#ifdef CONFIG_USER_ONLY
+        ARMCPRegUserSpaceInfo v8_user_idregs[] = {
+            { .name = "ID_AA64PFR0_EL1",
+              .exported_bits = 0x000f000f00ff0000,
+              .fixed_bits    = 0x0000000000000011 },
+            { .name = "ID_AA64PFR1_EL1",
+              .exported_bits = 0x00000000000000f0 },
+            { .name = "ID_AA64PFR*_EL1_RESERVED",
+              .is_glob = true                     },
+            { .name = "ID_AA64ZFR0_EL1"           },
+            { .name = "ID_AA64MMFR0_EL1",
+              .fixed_bits    = 0x00000000ff000000 },
+            { .name = "ID_AA64MMFR1_EL1"          },
+            { .name = "ID_AA64MMFR*_EL1_RESERVED",
+              .is_glob = true                     },
+            { .name = "ID_AA64DFR0_EL1",
+              .fixed_bits    = 0x0000000000000006 },
+            { .name = "ID_AA64DFR1_EL1"           },
+            { .name = "ID_AA64DFR*_EL1_RESERVED",
+              .is_glob = true                     },
+            { .name = "ID_AA64AFR*",
+              .is_glob = true                     },
+            { .name = "ID_AA64ISAR0_EL1",
+              .exported_bits = 0x00fffffff0fffff0 },
+            { .name = "ID_AA64ISAR1_EL1",
+              .exported_bits = 0x000000f0ffffffff },
+            { .name = "ID_AA64ISAR*_EL1_RESERVED",
+              .is_glob = true                     },
+            REGUSERINFO_SENTINEL
+        };
+        modify_arm_cp_regs(v8_idregs, v8_user_idregs);
+#endif
        /* RVBAR_EL1 is only implemented if EL1 is the highest EL */
        if (!arm_feature(env, ARM_FEATURE_EL3) &&
            !arm_feature(env, ARM_FEATURE_EL2)) {
@ -6379,6 +6433,15 @@ void register_cp_regs_for_features(ARMCPU *cpu)
            .opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_W,
            .type = ARM_CP_NOP | ARM_CP_OVERRIDE
        };
+#ifdef CONFIG_USER_ONLY
+        ARMCPRegUserSpaceInfo id_v8_user_midr_cp_reginfo[] = {
+            { .name = "MIDR_EL1",
+              .exported_bits = 0x00000000ffffffff },
+            { .name = "REVIDR_EL1"                },
+            REGUSERINFO_SENTINEL
+        };
+        modify_arm_cp_regs(id_v8_midr_cp_reginfo, id_v8_user_midr_cp_reginfo);
+#endif
        if (arm_feature(env, ARM_FEATURE_OMAPCP) ||
            arm_feature(env, ARM_FEATURE_STRONGARM)) {
            ARMCPRegInfo *r;
@ -6412,6 +6475,20 @@ void register_cp_regs_for_features(ARMCPU *cpu)
    }

    if (arm_feature(env, ARM_FEATURE_MPIDR)) {
+        ARMCPRegInfo mpidr_cp_reginfo[] = {
+            { .name = "MPIDR_EL1", .state = ARM_CP_STATE_BOTH,
+              .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
+              .access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW },
+            REGINFO_SENTINEL
+        };
+#ifdef CONFIG_USER_ONLY
+        ARMCPRegUserSpaceInfo mpidr_user_cp_reginfo[] = {
+            { .name = "MPIDR_EL1",
+              .fixed_bits = 0x0000000080000000 },
+            REGUSERINFO_SENTINEL
+        };
+        modify_arm_cp_regs(mpidr_cp_reginfo, mpidr_user_cp_reginfo);
+#endif
        define_arm_cp_regs(cpu, mpidr_cp_reginfo);
    }

@ -6851,7 +6928,11 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
    if (r->state != ARM_CP_STATE_AA32) {
        int mask = 0;
        switch (r->opc1) {
-        case 0: case 1: case 2:
+        case 0:
+            /* min_EL EL1, but some accessible to EL0 via kernel ABI */
+            mask = PL0U_R | PL1_RW;
+            break;
+        case 1: case 2:
            /* min_EL EL1 */
            mask = PL1_RW;
            break;
@ -6956,6 +7037,44 @@ void define_arm_cp_regs_with_opaque(ARMCPU *cpu,
    }
 }

+/*
+ * Modify ARMCPRegInfo for access from userspace.
+ *
+ * This is a data driven modification directed by
+ * ARMCPRegUserSpaceInfo. All registers become ARM_CP_CONST as
+ * user-space cannot alter any values and dynamic values pertaining to
+ * execution state are hidden from user space view anyway.
+ */
+void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods)
+{
+    const ARMCPRegUserSpaceInfo *m;
+    ARMCPRegInfo *r;
+
+    for (m = mods; m->name; m++) {
+        GPatternSpec *pat = NULL;
+        if (m->is_glob) {
+            pat = g_pattern_spec_new(m->name);
+        }
+        for (r = regs; r->type != ARM_CP_SENTINEL; r++) {
+            if (pat && g_pattern_match_string(pat, r->name)) {
+                r->type = ARM_CP_CONST;
+                r->access = PL0U_R;
+                r->resetvalue = 0;
+                /* continue */
+            } else if (strcmp(r->name, m->name) == 0) {
+                r->type = ARM_CP_CONST;
+                r->access = PL0U_R;
+                r->resetvalue &= m->exported_bits;
+                r->resetvalue |= m->fixed_bits;
+                break;
+            }
+        }
+        if (pat) {
+            g_pattern_spec_free(pat);
+        }
+    }
+}
+
 const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp)
 {
    return g_hash_table_lookup(cpregs, &encoded_cp);
@ -12585,10 +12704,9 @@ static inline int vfp_exceptbits_from_host(int host_bits)

 uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
 {
-    int i;
-    uint32_t fpscr;
+    uint32_t i, fpscr;

-    fpscr = (env->vfp.xregs[ARM_VFP_FPSCR] & 0xffc8ffff)
+    fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
            | (env->vfp.vec_len << 16)
            | (env->vfp.vec_stride << 20);

@ -12597,8 +12715,11 @@ uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
    /* FZ16 does not generate an input denormal exception.  */
    i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
          & ~float_flag_input_denormal);
-
    fpscr |= vfp_exceptbits_from_host(i);
+
+    i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
+    fpscr |= i ? FPCR_QC : 0;
+
    return fpscr;
 }

@ -12630,7 +12751,7 @@ static inline int vfp_exceptbits_to_host(int target_bits)
 void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
 {
    int i;
-    uint32_t changed;
+    uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];

    /* When ARMv8.2-FP16 is not supported, FZ16 is RES0.  */
    if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) {
@ -12639,15 +12760,25 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)

    /*
     * We don't implement trapped exception handling, so the
-     * trap enable bits are all RAZ/WI (not RES0!)
+     * trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
+     *
+     * If we exclude the exception flags, IOC|DZC|OFC|UFC|IXC|IDC
+     * (which are stored in fp_status), and the other RES0 bits
+     * in between, then we clear all of the low 16 bits.
     */
-    val &= ~(FPCR_IDE | FPCR_IXE | FPCR_UFE | FPCR_OFE | FPCR_DZE | FPCR_IOE);
-
-    changed = env->vfp.xregs[ARM_VFP_FPSCR];
-    env->vfp.xregs[ARM_VFP_FPSCR] = (val & 0xffc8ffff);
+    env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
    env->vfp.vec_len = (val >> 16) & 7;
    env->vfp.vec_stride = (val >> 20) & 3;

+    /*
+     * The bit we set within fpscr_q is arbitrary; the register as a
+     * whole being zero/non-zero is what counts.
+     */
+    env->vfp.qc[0] = val & FPCR_QC;
+    env->vfp.qc[1] = 0;
+    env->vfp.qc[2] = 0;
+    env->vfp.qc[3] = 0;
+
    changed ^= val;
    if (changed & (3 << 22)) {
        i = (val >> 22) & 3;
@ -12752,31 +12883,40 @@ float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
    return float64_sqrt(a, &env->vfp.fp_status);
 }

+static void softfloat_to_vfp_compare(CPUARMState *env, int cmp)
+{
+    uint32_t flags;
+    switch (cmp) {
+    case float_relation_equal:
+        flags = 0x6;
+        break;
+    case float_relation_less:
+        flags = 0x8;
+        break;
+    case float_relation_greater:
+        flags = 0x2;
+        break;
+    case float_relation_unordered:
+        flags = 0x3;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    env->vfp.xregs[ARM_VFP_FPSCR] =
+        deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
+}
+
 /* XXX: check quiet/signaling case */
 #define DO_VFP_cmp(p, type) \
 void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env)  \
 { \
-    uint32_t flags; \
-    switch(type ## _compare_quiet(a, b, &env->vfp.fp_status)) { \
-    case 0: flags = 0x6; break; \
-    case -1: flags = 0x8; break; \
-    case 1: flags = 0x2; break; \
-    default: case 2: flags = 0x3; break; \
-    } \
-    env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \
-        | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \
+    softfloat_to_vfp_compare(env, \
+        type ## _compare_quiet(a, b, &env->vfp.fp_status)); \
 } \
 void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \
 { \
-    uint32_t flags; \
-    switch(type ## _compare(a, b, &env->vfp.fp_status)) { \
-    case 0: flags = 0x6; break; \
-    case -1: flags = 0x8; break; \
-    case 1: flags = 0x2; break; \
-    default: case 2: flags = 0x3; break; \
-    } \
-    env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \
-        | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \
+    softfloat_to_vfp_compare(env, \
+        type ## _compare(a, b, &env->vfp.fp_status)); \
 }
 DO_VFP_cmp(s, float32)
 DO_VFP_cmp(d, float64)
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@ -276,18 +276,6 @@ DEF_HELPER_2(neon_cge_s16, i32, i32, i32)
 DEF_HELPER_2(neon_cge_u32, i32, i32, i32)
 DEF_HELPER_2(neon_cge_s32, i32, i32, i32)

-DEF_HELPER_2(neon_min_u8, i32, i32, i32)
-DEF_HELPER_2(neon_min_s8, i32, i32, i32)
-DEF_HELPER_2(neon_min_u16, i32, i32, i32)
-DEF_HELPER_2(neon_min_s16, i32, i32, i32)
-DEF_HELPER_2(neon_min_u32, i32, i32, i32)
-DEF_HELPER_2(neon_min_s32, i32, i32, i32)
-DEF_HELPER_2(neon_max_u8, i32, i32, i32)
-DEF_HELPER_2(neon_max_s8, i32, i32, i32)
-DEF_HELPER_2(neon_max_u16, i32, i32, i32)
-DEF_HELPER_2(neon_max_s16, i32, i32, i32)
-DEF_HELPER_2(neon_max_u32, i32, i32, i32)
-DEF_HELPER_2(neon_max_s32, i32, i32, i32)
 DEF_HELPER_2(neon_pmin_u8, i32, i32, i32)
 DEF_HELPER_2(neon_pmin_s8, i32, i32, i32)
 DEF_HELPER_2(neon_pmin_u16, i32, i32, i32)
@ -653,6 +641,39 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
                   void, ptr, ptr, ptr, ptr, ptr, i32)

+DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqadd_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqadd_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uqsub_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_b, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_h, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG,
+                   void, ptr, ptr, ptr, ptr, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #include "helper-sve.h"
--- a/target/arm/kvm32.c
+++ b/target/arm/kvm32.c
@ -387,24 +387,8 @@ int kvm_arch_put_registers(CPUState *cs, int level)
        return ret;
    }

-    /* Note that we do not call write_cpustate_to_list()
-     * here, so we are only writing the tuple list back to
-     * KVM. This is safe because nothing can change the
-     * CPUARMState cp15 fields (in particular gdb accesses cannot)
-     * and so there are no changes to sync. In fact syncing would
-     * be wrong at this point: for a constant register where TCG and
-     * KVM disagree about its value, the preceding write_list_to_cpustate()
-     * would not have had any effect on the CPUARMState value (since the
-     * register is read-only), and a write_cpustate_to_list() here would
-     * then try to write the TCG value back into KVM -- this would either
-     * fail or incorrectly change the value the guest sees.
-     *
-     * If we ever want to allow the user to modify cp15 registers via
-     * the gdb stub, we would need to be more clever here (for instance
-     * tracking the set of registers kvm_arch_get_registers() successfully
-     * managed to update the CPUARMState with, and only allowing those
-     * to be written back up into the kernel).
-     */
+    write_cpustate_to_list(cpu, true);
+
    if (!write_list_to_kvmstate(cpu, level)) {
        return EINVAL;
    }
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@ -838,6 +838,8 @@ int kvm_arch_put_registers(CPUState *cs, int level)
        return ret;
    }

+    write_cpustate_to_list(cpu, true);
+
    if (!write_list_to_kvmstate(cpu, level)) {
        return EINVAL;
    }
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@ -630,7 +630,7 @@ static int cpu_pre_save(void *opaque)
            abort();
        }
    } else {
-        if (!write_cpustate_to_list(cpu)) {
+        if (!write_cpustate_to_list(cpu, false)) {
            /* This should never fail. */
            abort();
        }
--- a/target/arm/neon_helper.c
+++ b/target/arm/neon_helper.c
@ -15,7 +15,7 @@
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)

-#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q
+#define SET_QC() env->vfp.qc[0] = 1

 #define NEON_TYPE1(name, type) \
 typedef struct \
@ -581,12 +581,6 @@ NEON_VOP(cge_u32, neon_u32, 1)
 #undef NEON_FN

 #define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
-NEON_VOP(min_s8, neon_s8, 4)
-NEON_VOP(min_u8, neon_u8, 4)
-NEON_VOP(min_s16, neon_s16, 2)
-NEON_VOP(min_u16, neon_u16, 2)
-NEON_VOP(min_s32, neon_s32, 1)
-NEON_VOP(min_u32, neon_u32, 1)
 NEON_POP(pmin_s8, neon_s8, 4)
 NEON_POP(pmin_u8, neon_u8, 4)
 NEON_POP(pmin_s16, neon_s16, 2)
@ -594,12 +588,6 @@ NEON_POP(pmin_u16, neon_u16, 2)
 #undef NEON_FN

 #define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
-NEON_VOP(max_s8, neon_s8, 4)
-NEON_VOP(max_u8, neon_u8, 4)
-NEON_VOP(max_s16, neon_s16, 2)
-NEON_VOP(max_u16, neon_u16, 2)
-NEON_VOP(max_s32, neon_s32, 1)
-NEON_VOP(max_u32, neon_u32, 1)
 NEON_POP(pmax_s8, neon_s8, 4)
 NEON_POP(pmax_u8, neon_u8, 4)
 NEON_POP(pmax_s16, neon_s16, 2)
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@ -10648,11 +10648,7 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
        return;
    case 2: /* ORR */
-        if (rn == rm) { /* MOV */
-            gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_mov, 0);
-        } else {
-            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
-        }
+        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
        return;
    case 3: /* ORN */
        gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
@ -10952,6 +10948,36 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
    }

    switch (opcode) {
+    case 0x01: /* SQADD, UQADD */
+        tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
+                       offsetof(CPUARMState, vfp.qc),
+                       vec_full_reg_offset(s, rn),
+                       vec_full_reg_offset(s, rm),
+                       is_q ? 16 : 8, vec_full_reg_size(s),
+                       (u ? uqadd_op : sqadd_op) + size);
+        return;
+    case 0x05: /* SQSUB, UQSUB */
+        tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
+                       offsetof(CPUARMState, vfp.qc),
+                       vec_full_reg_offset(s, rn),
+                       vec_full_reg_offset(s, rm),
+                       is_q ? 16 : 8, vec_full_reg_size(s),
+                       (u ? uqsub_op : sqsub_op) + size);
+        return;
+    case 0x0c: /* SMAX, UMAX */
+        if (u) {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
+        } else {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
+        }
+        return;
+    case 0x0d: /* SMIN, UMIN */
+        if (u) {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
+        } else {
+            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
+        }
+        return;
    case 0x10: /* ADD, SUB */
        if (u) {
            gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
@ -11033,16 +11059,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
                genfn = fns[size][u];
                break;
            }
-            case 0x1: /* SQADD, UQADD */
-            {
-                static NeonGenTwoOpEnvFn * const fns[3][2] = {
-                    { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
-                    { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
-                    { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
-                };
-                genenvfn = fns[size][u];
-                break;
-            }
            case 0x2: /* SRHADD, URHADD */
            {
                static NeonGenTwoOpFn * const fns[3][2] = {
@ -11063,16 +11079,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
                genfn = fns[size][u];
                break;
            }
-            case 0x5: /* SQSUB, UQSUB */
-            {
-                static NeonGenTwoOpEnvFn * const fns[3][2] = {
-                    { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
-                    { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
-                    { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
-                };
-                genenvfn = fns[size][u];
-                break;
-            }
            case 0x8: /* SSHL, USHL */
            {
                static NeonGenTwoOpFn * const fns[3][2] = {
@ -11113,27 +11119,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
                genenvfn = fns[size][u];
                break;
            }
-            case 0xc: /* SMAX, UMAX */
-            {
-                static NeonGenTwoOpFn * const fns[3][2] = {
-                    { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
-                    { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
-                    { tcg_gen_smax_i32, tcg_gen_umax_i32 },
-                };
-                genfn = fns[size][u];
-                break;
-            }
-
-            case 0xd: /* SMIN, UMIN */
-            {
-                static NeonGenTwoOpFn * const fns[3][2] = {
-                    { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
-                    { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
-                    { tcg_gen_smin_i32, tcg_gen_umin_i32 },
-                };
-                genfn = fns[size][u];
-                break;
-            }
            case 0xe: /* SABD, UABD */
            case 0xf: /* SABA, UABA */
            {
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@ -280,11 +280,7 @@ static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)

 static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
 {
-    if (a->rn == a->rm) { /* MOV */
-        return do_mov_z(s, a->rd, a->rn);
-    } else {
-        return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
-    }
+    return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
 }

 static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@ -4760,10 +4760,10 @@ static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
 }

 /* 32-bit pairwise ops end up the same as the elementwise versions.  */
-#define gen_helper_neon_pmax_s32  gen_helper_neon_max_s32
-#define gen_helper_neon_pmax_u32  gen_helper_neon_max_u32
-#define gen_helper_neon_pmin_s32  gen_helper_neon_min_s32
-#define gen_helper_neon_pmin_u32  gen_helper_neon_min_u32
+#define gen_helper_neon_pmax_s32  tcg_gen_smax_i32
+#define gen_helper_neon_pmax_u32  tcg_gen_umax_i32
+#define gen_helper_neon_pmin_s32  tcg_gen_smin_i32
+#define gen_helper_neon_pmin_u32  tcg_gen_umin_i32

 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
    switch ((size << 1) | u) { \
@ -6148,6 +6148,142 @@ const GVecGen3 cmtst_op[4] = {
      .vece = MO_64 },
 };

+static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+                          TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec x = tcg_temp_new_vec_matching(t);
+    tcg_gen_add_vec(vece, x, a, b);
+    tcg_gen_usadd_vec(vece, t, a, b);
+    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+    tcg_gen_or_vec(vece, sat, sat, x);
+    tcg_temp_free_vec(x);
+}
+
+const GVecGen4 uqadd_op[4] = {
+    { .fniv = gen_uqadd_vec,
+      .fno = gen_helper_gvec_uqadd_b,
+      .opc = INDEX_op_usadd_vec,
+      .write_aofs = true,
+      .vece = MO_8 },
+    { .fniv = gen_uqadd_vec,
+      .fno = gen_helper_gvec_uqadd_h,
+      .opc = INDEX_op_usadd_vec,
+      .write_aofs = true,
+      .vece = MO_16 },
+    { .fniv = gen_uqadd_vec,
+      .fno = gen_helper_gvec_uqadd_s,
+      .opc = INDEX_op_usadd_vec,
+      .write_aofs = true,
+      .vece = MO_32 },
+    { .fniv = gen_uqadd_vec,
+      .fno = gen_helper_gvec_uqadd_d,
+      .opc = INDEX_op_usadd_vec,
+      .write_aofs = true,
+      .vece = MO_64 },
+};
+
+static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+                          TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec x = tcg_temp_new_vec_matching(t);
+    tcg_gen_add_vec(vece, x, a, b);
+    tcg_gen_ssadd_vec(vece, t, a, b);
+    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+    tcg_gen_or_vec(vece, sat, sat, x);
+    tcg_temp_free_vec(x);
+}
+
+const GVecGen4 sqadd_op[4] = {
+    { .fniv = gen_sqadd_vec,
+      .fno = gen_helper_gvec_sqadd_b,
+      .opc = INDEX_op_ssadd_vec,
+      .write_aofs = true,
+      .vece = MO_8 },
+    { .fniv = gen_sqadd_vec,
+      .fno = gen_helper_gvec_sqadd_h,
+      .opc = INDEX_op_ssadd_vec,
+      .write_aofs = true,
+      .vece = MO_16 },
+    { .fniv = gen_sqadd_vec,
+      .fno = gen_helper_gvec_sqadd_s,
+      .opc = INDEX_op_ssadd_vec,
+      .write_aofs = true,
+      .vece = MO_32 },
+    { .fniv = gen_sqadd_vec,
+      .fno = gen_helper_gvec_sqadd_d,
+      .opc = INDEX_op_ssadd_vec,
+      .write_aofs = true,
+      .vece = MO_64 },
+};
+
+static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+                          TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec x = tcg_temp_new_vec_matching(t);
+    tcg_gen_sub_vec(vece, x, a, b);
+    tcg_gen_ussub_vec(vece, t, a, b);
+    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+    tcg_gen_or_vec(vece, sat, sat, x);
+    tcg_temp_free_vec(x);
+}
+
+const GVecGen4 uqsub_op[4] = {
+    { .fniv = gen_uqsub_vec,
+      .fno = gen_helper_gvec_uqsub_b,
+      .opc = INDEX_op_ussub_vec,
+      .write_aofs = true,
+      .vece = MO_8 },
+    { .fniv = gen_uqsub_vec,
+      .fno = gen_helper_gvec_uqsub_h,
+      .opc = INDEX_op_ussub_vec,
+      .write_aofs = true,
+      .vece = MO_16 },
+    { .fniv = gen_uqsub_vec,
+      .fno = gen_helper_gvec_uqsub_s,
+      .opc = INDEX_op_ussub_vec,
+      .write_aofs = true,
+      .vece = MO_32 },
+    { .fniv = gen_uqsub_vec,
+      .fno = gen_helper_gvec_uqsub_d,
+      .opc = INDEX_op_ussub_vec,
+      .write_aofs = true,
+      .vece = MO_64 },
+};
+
+static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
+                          TCGv_vec a, TCGv_vec b)
+{
+    TCGv_vec x = tcg_temp_new_vec_matching(t);
+    tcg_gen_sub_vec(vece, x, a, b);
+    tcg_gen_sssub_vec(vece, t, a, b);
+    tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
+    tcg_gen_or_vec(vece, sat, sat, x);
+    tcg_temp_free_vec(x);
+}
+
+const GVecGen4 sqsub_op[4] = {
+    { .fniv = gen_sqsub_vec,
+      .fno = gen_helper_gvec_sqsub_b,
+      .opc = INDEX_op_sssub_vec,
+      .write_aofs = true,
+      .vece = MO_8 },
+    { .fniv = gen_sqsub_vec,
+      .fno = gen_helper_gvec_sqsub_h,
+      .opc = INDEX_op_sssub_vec,
+      .write_aofs = true,
+      .vece = MO_16 },
+    { .fniv = gen_sqsub_vec,
+      .fno = gen_helper_gvec_sqsub_s,
+      .opc = INDEX_op_sssub_vec,
+      .write_aofs = true,
+      .vece = MO_32 },
+    { .fniv = gen_sqsub_vec,
+      .fno = gen_helper_gvec_sqsub_d,
+      .opc = INDEX_op_sssub_vec,
+      .write_aofs = true,
+      .vece = MO_64 },
+};
+
 /* Translate a NEON data processing instruction.  Return nonzero if the
   instruction is invalid.
   We process data in a mixture of 32-bit and 64-bit chunks.
@ -6294,15 +6430,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
                                  vec_size, vec_size);
                break;
-            case 2:
-                if (rn == rm) {
-                    /* VMOV */
-                    tcg_gen_gvec_mov(0, rd_ofs, rn_ofs, vec_size, vec_size);
-                } else {
-                    /* VORR */
-                    tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
-                                    vec_size, vec_size);
-                }
+            case 2: /* VORR */
+                tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
+                                vec_size, vec_size);
                break;
            case 3: /* VORN */
                tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
@ -6337,6 +6467,18 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
            }
            return 0;

+        case NEON_3R_VQADD:
+            tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+                           rn_ofs, rm_ofs, vec_size, vec_size,
+                           (u ? uqadd_op : sqadd_op) + size);
+            break;
+
+        case NEON_3R_VQSUB:
+            tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
+                           rn_ofs, rm_ofs, vec_size, vec_size,
+                           (u ? uqsub_op : sqsub_op) + size);
+            break;
+
        case NEON_3R_VMUL: /* VMUL */
            if (u) {
                /* Polynomial case allows only P8 and is handled below.  */
@ -6374,6 +6516,25 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
            tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
                             rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
            return 0;
+
+        case NEON_3R_VMAX:
+            if (u) {
+                tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
+                                  vec_size, vec_size);
+            } else {
+                tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
+                                  vec_size, vec_size);
+            }
+            return 0;
+        case NEON_3R_VMIN:
+            if (u) {
+                tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
+                                  vec_size, vec_size);
+            } else {
+                tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
+                                  vec_size, vec_size);
+            }
+            return 0;
        }

        if (size == 3) {
@ -6382,24 +6543,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
                neon_load_reg64(cpu_V0, rn + pass);
                neon_load_reg64(cpu_V1, rm + pass);
                switch (op) {
-                case NEON_3R_VQADD:
-                    if (u) {
-                        gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
-                                                 cpu_V0, cpu_V1);
-                    } else {
-                        gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
-                                                 cpu_V0, cpu_V1);
-                    }
-                    break;
-                case NEON_3R_VQSUB:
-                    if (u) {
-                        gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
-                                                 cpu_V0, cpu_V1);
-                    } else {
-                        gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
-                                                 cpu_V0, cpu_V1);
-                    }
-                    break;
                case NEON_3R_VSHL:
                    if (u) {
                        gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
@ -6515,18 +6658,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
        case NEON_3R_VHADD:
            GEN_NEON_INTEGER_OP(hadd);
            break;
-        case NEON_3R_VQADD:
-            GEN_NEON_INTEGER_OP_ENV(qadd);
-            break;
        case NEON_3R_VRHADD:
            GEN_NEON_INTEGER_OP(rhadd);
            break;
        case NEON_3R_VHSUB:
            GEN_NEON_INTEGER_OP(hsub);
            break;
-        case NEON_3R_VQSUB:
-            GEN_NEON_INTEGER_OP_ENV(qsub);
-            break;
        case NEON_3R_VSHL:
            GEN_NEON_INTEGER_OP(shl);
            break;
@ -6539,12 +6676,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
        case NEON_3R_VQRSHL:
            GEN_NEON_INTEGER_OP_ENV(qrshl);
            break;
-        case NEON_3R_VMAX:
-            GEN_NEON_INTEGER_OP(max);
-            break;
-        case NEON_3R_VMIN:
-            GEN_NEON_INTEGER_OP(min);
-            break;
        case NEON_3R_VABD:
            GEN_NEON_INTEGER_OP(abd);
            break;
@ -13634,7 +13765,7 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
                        i * 2 + 1, (uint32_t)(v >> 32),
                        i, v);
        }
-        cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
+        cpu_fprintf(f, "FPSCR: %08x\n", vfp_get_fpscr(env));
    }
 }

--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@ -214,6 +214,10 @@ extern const GVecGen2i ssra_op[4];
 extern const GVecGen2i usra_op[4];
 extern const GVecGen2i sri_op[4];
 extern const GVecGen2i sli_op[4];
+extern const GVecGen4 uqadd_op[4];
+extern const GVecGen4 sqadd_op[4];
+extern const GVecGen4 uqsub_op[4];
+extern const GVecGen4 sqsub_op[4];
 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);

 /*
--- a/target/arm/vec_helper.c
+++ b/target/arm/vec_helper.c
@ -36,7 +36,7 @@
 #define H4(x)  (x)
 #endif

-#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q
+#define SET_QC() env->vfp.qc[0] = 1

 static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
 {
@ -638,6 +638,7 @@ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc)  \
    for (i = 0; i < oprsz / sizeof(TYPE); i++) {                  \
        d[i] = FUNC(n[i], stat);                                  \
    }                                                             \
+    clear_tail(d, oprsz, simd_maxsz(desc));                       \
 }

 DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16)
@ -688,6 +689,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
    for (i = 0; i < oprsz / sizeof(TYPE); i++) {                           \
        d[i] = FUNC(n[i], m[i], stat);                                     \
    }                                                                      \
+    clear_tail(d, oprsz, simd_maxsz(desc));                                \
 }

 DO_3OP(gvec_fadd_h, float16_add, float16)
@ -766,3 +768,133 @@ DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
 DO_FMLA_IDX(gvec_fmla_idx_d, float64, )

 #undef DO_FMLA_IDX
+
+#define DO_SAT(NAME, WTYPE, TYPEN, TYPEM, OP, MIN, MAX) \
+void HELPER(NAME)(void *vd, void *vq, void *vn, void *vm, uint32_t desc)   \
+{                                                                          \
+    intptr_t i, oprsz = simd_oprsz(desc);                                  \
+    TYPEN *d = vd, *n = vn; TYPEM *m = vm;                                 \
+    bool q = false;                                                        \
+    for (i = 0; i < oprsz / sizeof(TYPEN); i++) {                          \
+        WTYPE dd = (WTYPE)n[i] OP m[i];                                    \
+        if (dd < MIN) {                                                    \
+            dd = MIN;                                                      \
+            q = true;                                                      \
+        } else if (dd > MAX) {                                             \
+            dd = MAX;                                                      \
+            q = true;                                                      \
+        }                                                                  \
+        d[i] = dd;                                                         \
+    }                                                                      \
+    if (q) {                                                               \
+        uint32_t *qc = vq;                                                 \
+        qc[0] = 1;                                                         \
+    }                                                                      \
+    clear_tail(d, oprsz, simd_maxsz(desc));                                \
+}
+
+DO_SAT(gvec_uqadd_b, int, uint8_t, uint8_t, +, 0, UINT8_MAX)
+DO_SAT(gvec_uqadd_h, int, uint16_t, uint16_t, +, 0, UINT16_MAX)
+DO_SAT(gvec_uqadd_s, int64_t, uint32_t, uint32_t, +, 0, UINT32_MAX)
+
+DO_SAT(gvec_sqadd_b, int, int8_t, int8_t, +, INT8_MIN, INT8_MAX)
+DO_SAT(gvec_sqadd_h, int, int16_t, int16_t, +, INT16_MIN, INT16_MAX)
+DO_SAT(gvec_sqadd_s, int64_t, int32_t, int32_t, +, INT32_MIN, INT32_MAX)
+
+DO_SAT(gvec_uqsub_b, int, uint8_t, uint8_t, -, 0, UINT8_MAX)
+DO_SAT(gvec_uqsub_h, int, uint16_t, uint16_t, -, 0, UINT16_MAX)
+DO_SAT(gvec_uqsub_s, int64_t, uint32_t, uint32_t, -, 0, UINT32_MAX)
+
+DO_SAT(gvec_sqsub_b, int, int8_t, int8_t, -, INT8_MIN, INT8_MAX)
+DO_SAT(gvec_sqsub_h, int, int16_t, int16_t, -, INT16_MIN, INT16_MAX)
+DO_SAT(gvec_sqsub_s, int64_t, int32_t, int32_t, -, INT32_MIN, INT32_MAX)
+
+#undef DO_SAT
+
+void HELPER(gvec_uqadd_d)(void *vd, void *vq, void *vn,
+                          void *vm, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+    bool q = false;
+
+    for (i = 0; i < oprsz / 8; i++) {
+        uint64_t nn = n[i], mm = m[i], dd = nn + mm;
+        if (dd < nn) {
+            dd = UINT64_MAX;
+            q = true;
+        }
+        d[i] = dd;
+    }
+    if (q) {
+        uint32_t *qc = vq;
+        qc[0] = 1;
+    }
+    clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_uqsub_d)(void *vd, void *vq, void *vn,
+                          void *vm, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+    uint64_t *d = vd, *n = vn, *m = vm;
+    bool q = false;
+
+    for (i = 0; i < oprsz / 8; i++) {
+        uint64_t nn = n[i], mm = m[i], dd = nn - mm;
+        if (nn < mm) {
+            dd = 0;
+            q = true;
+        }
+        d[i] = dd;
+    }
+    if (q) {
+        uint32_t *qc = vq;
+        qc[0] = 1;
+    }
+    clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_sqadd_d)(void *vd, void *vq, void *vn,
+                          void *vm, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+    int64_t *d = vd, *n = vn, *m = vm;
+    bool q = false;
+
+    for (i = 0; i < oprsz / 8; i++) {
+        int64_t nn = n[i], mm = m[i], dd = nn + mm;
+        if (((dd ^ nn) & ~(nn ^ mm)) & INT64_MIN) {
+            dd = (nn >> 63) ^ ~INT64_MIN;
+            q = true;
+        }
+        d[i] = dd;
+    }
+    if (q) {
+        uint32_t *qc = vq;
+        qc[0] = 1;
+    }
+    clear_tail(d, oprsz, simd_maxsz(desc));
+}
+
+void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn,
+                          void *vm, uint32_t desc)
+{
+    intptr_t i, oprsz = simd_oprsz(desc);
+    int64_t *d = vd, *n = vn, *m = vm;
+    bool q = false;
+
+    for (i = 0; i < oprsz / 8; i++) {
+        int64_t nn = n[i], mm = m[i], dd = nn - mm;
+        if (((dd ^ nn) & (nn ^ mm)) & INT64_MIN) {
+            dd = (nn >> 63) ^ ~INT64_MIN;
+            q = true;
+        }
+        d[i] = dd;
+    }
+    if (q) {
+        uint32_t *qc = vq;
+        qc[0] = 1;
+    }
+    clear_tail(d, oprsz, simd_maxsz(desc));
+}