target-arm queue:
* gdbstub: Send a reply to the vKill packet * Improve codegen for neon min/max and saturating arithmetic * Fix a bug in clearing FPSCR exception status bits * hw/arm/armsse: Fix miswiring of expansion IRQs * hw/intc/armv7m_nvic: Allow byte accesses to SHPR1 * MAINTAINERS: Remove Peter Crosthwaite from various entries * arm: Allow system registers for KVM guests to be changed by QEMU code * linux-user: support HWCAP_CPUID which exposes ID registers to user code * Fix bug in 128-bit cmpxchg for BE Arm guests * Implement (no-op) HACR_EL2 * Fix CRn to be 14 for PMEVTYPER/PMEVCNTR -----BEGIN PGP SIGNATURE----- iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAlxmkiIZHHBldGVyLm1h eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3vzlD/wKPD9Hg45ieqwFHA5ovKFr 31gNuDIUZpGf8ohKCUF/SXZkq6T81eWFn2+J60UQFyAw0owGiqn24Er7LKJQNqBw kwFGAwBUZ2Kbs4qg+RRlHYpC+IZf5MIRbLSWdy9neAyewRniekzR1Gw5+NamY4nw rJLlSezL2cxG8nKea/HKM1gmgk3wUxL4mfbC+ZLmY87ZTYzRb38q1V4iU2MV59FD U23+s5kJ21/LcQwWLFJE6Dr9AuGHEIsFxWvgFbhB61tR5wX2Vv19dFS12KJibII2 7IX6V+mo+aeQCp0GAMOe9yi+Xd2txgiAZ/mV0JnYykU19oEFFlDGtnptVGNU0paM rn0KrdO2mBIuIw995yFPkUMtpU1EnrG7e7XtDGdrlXxeRwUfD+okLOUSsTTYPYiF uPhAu6SRvGKZ1qYFjR5v2WnUsITMbH/I+oIh+unIyxm+lO/zBEybPvWpCXgDE36D J30Ol1j/3UNcQH9SnvCTvFopH16A8nJIqYB5rJ2SclUPr+aTWdYI5Lrs+opgrN+H LeOStYkRXibgwFX3L5ZdDEGhoyuIKLpPVlaTBRM9Y3ld5huBd02m93L4z1rrdd/g +q0Zg4X28u3Fq1wBx/bHhkwH7+Oa9f0D0dOSctQGCHzEDYHt4dBTx2lXqJI4Etab fzMeuoqBo33lSZMTBLXsAA== =xqVQ -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20190215' into staging target-arm queue: * gdbstub: Send a reply to the vKill packet * Improve codegen for neon min/max and saturating arithmetic * Fix a bug in clearing FPSCR exception status bits * hw/arm/armsse: Fix miswiring of expansion IRQs * hw/intc/armv7m_nvic: Allow byte accesses to SHPR1 * MAINTAINERS: Remove Peter Crosthwaite from various entries * arm: Allow system registers for KVM guests to be changed by QEMU code * linux-user: support HWCAP_CPUID which exposes ID registers to user code * Fix bug in 128-bit cmpxchg for BE Arm guests * Implement (no-op) HACR_EL2 * Fix CRn to be 14 for PMEVTYPER/PMEVCNTR # gpg: Signature made Fri 15 Feb 2019 10:19:14 GMT # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [ultimate] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [ultimate] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * remotes/pmaydell/tags/pull-target-arm-20190215: (25 commits) gdbstub: Send a reply to the vKill packet. target/arm: Add missing clear_tail calls target/arm: Use vector operations for saturation target/arm: Split out FPSCR.QC to a vector field target/arm: Fix set of bits kept in xregs[ARM_VFP_FPSCR] target/arm: Split out flags setting from vfp compares target/arm: Fix arm_cpu_dump_state vs FPSCR target/arm: Fix vfp_gdb_get/set_reg vs FPSCR target/arm: Remove neon min/max helpers target/arm: Use tcg integer min/max primitives for neon target/arm: Use vector minmax expanders for aarch32 target/arm: Use vector minmax expanders for aarch64 target/arm: Rely on optimization within tcg_gen_gvec_or hw/arm/armsse: Fix miswiring of expansion IRQs hw/intc/armv7m_nvic: Allow byte accesses to SHPR1 MAINTAINERS: Remove Peter Crosthwaite from various entries arm: Allow system registers for KVM guests to be changed by QEMU code linux-user/elfload: enable HWCAP_CPUID for AArch64 target/arm: expose remaining CPUID registers as RAZ target/arm: expose MPIDR_EL1 to userspace ... Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
81dbcfa9e1
@ -110,7 +110,6 @@ Guest CPU cores (TCG):
|
||||
----------------------
|
||||
Overall
|
||||
L: qemu-devel@nongnu.org
|
||||
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
|
||||
M: Richard Henderson <rth@twiddle.net>
|
||||
R: Paolo Bonzini <pbonzini@redhat.com>
|
||||
S: Maintained
|
||||
@ -1345,7 +1344,6 @@ F: tests/virtio-scsi-test.c
|
||||
T: git https://github.com/bonzini/qemu.git scsi-next
|
||||
|
||||
SSI
|
||||
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
|
||||
M: Alistair Francis <alistair@alistair23.me>
|
||||
S: Maintained
|
||||
F: hw/ssi/*
|
||||
@ -1356,7 +1354,6 @@ F: tests/m25p80-test.c
|
||||
|
||||
Xilinx SPI
|
||||
M: Alistair Francis <alistair@alistair23.me>
|
||||
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
|
||||
S: Maintained
|
||||
F: hw/ssi/xilinx_*
|
||||
|
||||
@ -1766,7 +1763,6 @@ F: qom/cpu.c
|
||||
F: include/qom/cpu.h
|
||||
|
||||
Device Tree
|
||||
M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
|
||||
M: Alexander Graf <agraf@suse.de>
|
||||
S: Maintained
|
||||
F: device_tree.c
|
||||
|
@ -1361,6 +1361,7 @@ static int gdb_handle_packet(GDBState *s, const char *line_buf)
|
||||
break;
|
||||
} else if (strncmp(p, "Kill;", 5) == 0) {
|
||||
/* Kill the target */
|
||||
put_packet(s, "OK");
|
||||
error_report("QEMU: Terminated via GDBstub");
|
||||
exit(0);
|
||||
} else {
|
||||
|
@ -565,7 +565,7 @@ static void armsse_realize(DeviceState *dev, Error **errp)
|
||||
/* Connect EXP_IRQ/EXP_CPUn_IRQ GPIOs to the NVIC's lines 32 and up */
|
||||
s->exp_irqs[i] = g_new(qemu_irq, s->exp_numirq);
|
||||
for (j = 0; j < s->exp_numirq; j++) {
|
||||
s->exp_irqs[i][j] = qdev_get_gpio_in(cpudev, i + 32);
|
||||
s->exp_irqs[i][j] = qdev_get_gpio_in(cpudev, j + 32);
|
||||
}
|
||||
if (i == 0) {
|
||||
gpioname = g_strdup("EXP_IRQ");
|
||||
|
@ -1841,7 +1841,7 @@ static MemTxResult nvic_sysreg_read(void *opaque, hwaddr addr,
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 0xd18: /* System Handler Priority (SHPR1) */
|
||||
case 0xd18 ... 0xd1b: /* System Handler Priority (SHPR1) */
|
||||
if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) {
|
||||
val = 0;
|
||||
break;
|
||||
@ -1956,7 +1956,7 @@ static MemTxResult nvic_sysreg_write(void *opaque, hwaddr addr,
|
||||
}
|
||||
nvic_irq_update(s);
|
||||
return MEMTX_OK;
|
||||
case 0xd18: /* System Handler Priority (SHPR1) */
|
||||
case 0xd18 ... 0xd1b: /* System Handler Priority (SHPR1) */
|
||||
if (!arm_feature(&s->cpu->env, ARM_FEATURE_M_MAIN)) {
|
||||
return MEMTX_OK;
|
||||
}
|
||||
|
@ -580,6 +580,7 @@ static uint32_t get_elf_hwcap(void)
|
||||
|
||||
hwcaps |= ARM_HWCAP_A64_FP;
|
||||
hwcaps |= ARM_HWCAP_A64_ASIMD;
|
||||
hwcaps |= ARM_HWCAP_A64_CPUID;
|
||||
|
||||
/* probe for the extra features */
|
||||
#define GET_FEATURE_ID(feat, hwcap) \
|
||||
|
@ -577,11 +577,13 @@ typedef struct CPUARMState {
|
||||
ARMPredicateReg preg_tmp;
|
||||
#endif
|
||||
|
||||
uint32_t xregs[16];
|
||||
/* We store these fpcsr fields separately for convenience. */
|
||||
uint32_t qc[4] QEMU_ALIGNED(16);
|
||||
int vec_len;
|
||||
int vec_stride;
|
||||
|
||||
uint32_t xregs[16];
|
||||
|
||||
/* Scratch space for aa32 neon expansion. */
|
||||
uint32_t scratch[8];
|
||||
|
||||
@ -1427,6 +1429,7 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val);
|
||||
#define FPCR_FZ16 (1 << 19) /* ARMv8.2+, FP16 flush-to-zero */
|
||||
#define FPCR_FZ (1 << 24) /* Flush-to-zero enable bit */
|
||||
#define FPCR_DN (1 << 25) /* Default NaN enable bit */
|
||||
#define FPCR_QC (1 << 27) /* Cumulative saturation bit */
|
||||
|
||||
static inline uint32_t vfp_get_fpsr(CPUARMState *env)
|
||||
{
|
||||
@ -2226,6 +2229,18 @@ static inline bool cptype_valid(int cptype)
|
||||
#define PL0_R (0x02 | PL1_R)
|
||||
#define PL0_W (0x01 | PL1_W)
|
||||
|
||||
/*
|
||||
* For user-mode some registers are accessible to EL0 via a kernel
|
||||
* trap-and-emulate ABI. In this case we define the read permissions
|
||||
* as actually being PL0_R. However some bits of any given register
|
||||
* may still be masked.
|
||||
*/
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
#define PL0U_R PL0_R
|
||||
#else
|
||||
#define PL0U_R PL1_R
|
||||
#endif
|
||||
|
||||
#define PL3_RW (PL3_R | PL3_W)
|
||||
#define PL2_RW (PL2_R | PL2_W)
|
||||
#define PL1_RW (PL1_R | PL1_W)
|
||||
@ -2452,6 +2467,30 @@ static inline void define_one_arm_cp_reg(ARMCPU *cpu, const ARMCPRegInfo *regs)
|
||||
}
|
||||
const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp);
|
||||
|
||||
/*
|
||||
* Definition of an ARM co-processor register as viewed from
|
||||
* userspace. This is used for presenting sanitised versions of
|
||||
* registers to userspace when emulating the Linux AArch64 CPU
|
||||
* ID/feature ABI (advertised as HWCAP_CPUID).
|
||||
*/
|
||||
typedef struct ARMCPRegUserSpaceInfo {
|
||||
/* Name of register */
|
||||
const char *name;
|
||||
|
||||
/* Is the name actually a glob pattern */
|
||||
bool is_glob;
|
||||
|
||||
/* Only some bits are exported to user space */
|
||||
uint64_t exported_bits;
|
||||
|
||||
/* Fixed bits are applied after the mask */
|
||||
uint64_t fixed_bits;
|
||||
} ARMCPRegUserSpaceInfo;
|
||||
|
||||
#define REGUSERINFO_SENTINEL { .name = NULL }
|
||||
|
||||
void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods);
|
||||
|
||||
/* CPWriteFn that can be used to implement writes-ignored behaviour */
|
||||
void arm_cp_write_ignore(CPUARMState *env, const ARMCPRegInfo *ri,
|
||||
uint64_t value);
|
||||
@ -2499,18 +2538,25 @@ bool write_list_to_cpustate(ARMCPU *cpu);
|
||||
/**
|
||||
* write_cpustate_to_list:
|
||||
* @cpu: ARMCPU
|
||||
* @kvm_sync: true if this is for syncing back to KVM
|
||||
*
|
||||
* For each register listed in the ARMCPU cpreg_indexes list, write
|
||||
* its value from the ARMCPUState structure into the cpreg_values list.
|
||||
* This is used to copy info from TCG's working data structures into
|
||||
* KVM or for outbound migration.
|
||||
*
|
||||
* @kvm_sync is true if we are doing this in order to sync the
|
||||
* register state back to KVM. In this case we will only update
|
||||
* values in the list if the previous list->cpustate sync actually
|
||||
* successfully wrote the CPU state. Otherwise we will keep the value
|
||||
* that is in the list.
|
||||
*
|
||||
* Returns: true if all register values were read correctly,
|
||||
* false if some register was unknown or could not be read.
|
||||
* Note that we do not stop early on failure -- we will attempt
|
||||
* reading all registers in the list.
|
||||
*/
|
||||
bool write_cpustate_to_list(ARMCPU *cpu);
|
||||
bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync);
|
||||
|
||||
#define ARM_CPUID_TI915T 0x54029152
|
||||
#define ARM_CPUID_TI925T 0x54029252
|
||||
|
@ -583,8 +583,8 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
|
||||
* High and low need to be switched here because this is not actually a
|
||||
* 128bit store but two doublewords stored consecutively
|
||||
*/
|
||||
Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
|
||||
Int128 newv = int128_make128(new_lo, new_hi);
|
||||
Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
|
||||
Int128 newv = int128_make128(new_hi, new_lo);
|
||||
Int128 oldv;
|
||||
uintptr_t ra = GETPC();
|
||||
uint64_t o0, o1;
|
||||
|
@ -81,7 +81,7 @@ static int vfp_gdb_get_reg(CPUARMState *env, uint8_t *buf, int reg)
|
||||
}
|
||||
switch (reg - nregs) {
|
||||
case 0: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSID]); return 4;
|
||||
case 1: stl_p(buf, env->vfp.xregs[ARM_VFP_FPSCR]); return 4;
|
||||
case 1: stl_p(buf, vfp_get_fpscr(env)); return 4;
|
||||
case 2: stl_p(buf, env->vfp.xregs[ARM_VFP_FPEXC]); return 4;
|
||||
}
|
||||
return 0;
|
||||
@ -107,7 +107,7 @@ static int vfp_gdb_set_reg(CPUARMState *env, uint8_t *buf, int reg)
|
||||
}
|
||||
switch (reg - nregs) {
|
||||
case 0: env->vfp.xregs[ARM_VFP_FPSID] = ldl_p(buf); return 4;
|
||||
case 1: env->vfp.xregs[ARM_VFP_FPSCR] = ldl_p(buf); return 4;
|
||||
case 1: vfp_set_fpscr(env, ldl_p(buf)); return 4;
|
||||
case 2: env->vfp.xregs[ARM_VFP_FPEXC] = ldl_p(buf) & (1 << 30); return 4;
|
||||
}
|
||||
return 0;
|
||||
@ -264,7 +264,7 @@ static bool raw_accessors_invalid(const ARMCPRegInfo *ri)
|
||||
return true;
|
||||
}
|
||||
|
||||
bool write_cpustate_to_list(ARMCPU *cpu)
|
||||
bool write_cpustate_to_list(ARMCPU *cpu, bool kvm_sync)
|
||||
{
|
||||
/* Write the coprocessor state from cpu->env to the (index,value) list. */
|
||||
int i;
|
||||
@ -273,6 +273,7 @@ bool write_cpustate_to_list(ARMCPU *cpu)
|
||||
for (i = 0; i < cpu->cpreg_array_len; i++) {
|
||||
uint32_t regidx = kvm_to_cpreg_id(cpu->cpreg_indexes[i]);
|
||||
const ARMCPRegInfo *ri;
|
||||
uint64_t newval;
|
||||
|
||||
ri = get_arm_cp_reginfo(cpu->cp_regs, regidx);
|
||||
if (!ri) {
|
||||
@ -282,7 +283,29 @@ bool write_cpustate_to_list(ARMCPU *cpu)
|
||||
if (ri->type & ARM_CP_NO_RAW) {
|
||||
continue;
|
||||
}
|
||||
cpu->cpreg_values[i] = read_raw_cp_reg(&cpu->env, ri);
|
||||
|
||||
newval = read_raw_cp_reg(&cpu->env, ri);
|
||||
if (kvm_sync) {
|
||||
/*
|
||||
* Only sync if the previous list->cpustate sync succeeded.
|
||||
* Rather than tracking the success/failure state for every
|
||||
* item in the list, we just recheck "does the raw write we must
|
||||
* have made in write_list_to_cpustate() read back OK" here.
|
||||
*/
|
||||
uint64_t oldval = cpu->cpreg_values[i];
|
||||
|
||||
if (oldval == newval) {
|
||||
continue;
|
||||
}
|
||||
|
||||
write_raw_cp_reg(&cpu->env, ri, oldval);
|
||||
if (read_raw_cp_reg(&cpu->env, ri) != oldval) {
|
||||
continue;
|
||||
}
|
||||
|
||||
write_raw_cp_reg(&cpu->env, ri, newval);
|
||||
}
|
||||
cpu->cpreg_values[i] = newval;
|
||||
}
|
||||
return ok;
|
||||
}
|
||||
@ -3657,13 +3680,6 @@ static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
|
||||
return mpidr_read_val(env);
|
||||
}
|
||||
|
||||
static const ARMCPRegInfo mpidr_cp_reginfo[] = {
|
||||
{ .name = "MPIDR", .state = ARM_CP_STATE_BOTH,
|
||||
.opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
|
||||
.access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW },
|
||||
REGINFO_SENTINEL
|
||||
};
|
||||
|
||||
static const ARMCPRegInfo lpae_cp_reginfo[] = {
|
||||
/* NOP AMAIR0/1 */
|
||||
{ .name = "AMAIR0", .state = ARM_CP_STATE_BOTH,
|
||||
@ -4434,6 +4450,9 @@ static const ARMCPRegInfo el3_no_el2_cp_reginfo[] = {
|
||||
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
|
||||
.access = PL2_RW,
|
||||
.type = ARM_CP_CONST, .resetvalue = 0 },
|
||||
{ .name = "HACR_EL2", .state = ARM_CP_STATE_BOTH,
|
||||
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7,
|
||||
.access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
|
||||
{ .name = "ESR_EL2", .state = ARM_CP_STATE_BOTH,
|
||||
.opc0 = 3, .opc1 = 4, .crn = 5, .crm = 2, .opc2 = 0,
|
||||
.access = PL2_RW,
|
||||
@ -4666,6 +4685,9 @@ static const ARMCPRegInfo el2_cp_reginfo[] = {
|
||||
.cp = 15, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 0,
|
||||
.access = PL2_RW, .fieldoffset = offsetof(CPUARMState, cp15.hcr_el2),
|
||||
.writefn = hcr_writelow },
|
||||
{ .name = "HACR_EL2", .state = ARM_CP_STATE_BOTH,
|
||||
.opc0 = 3, .opc1 = 4, .crn = 1, .crm = 1, .opc2 = 7,
|
||||
.access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
|
||||
{ .name = "ELR_EL2", .state = ARM_CP_STATE_AA64,
|
||||
.type = ARM_CP_ALIAS,
|
||||
.opc0 = 3, .opc1 = 4, .crn = 4, .crm = 0, .opc2 = 1,
|
||||
@ -5855,25 +5877,25 @@ void register_cp_regs_for_features(ARMCPU *cpu)
|
||||
char *pmevtyper_name = g_strdup_printf("PMEVTYPER%d", i);
|
||||
char *pmevtyper_el0_name = g_strdup_printf("PMEVTYPER%d_EL0", i);
|
||||
ARMCPRegInfo pmev_regs[] = {
|
||||
{ .name = pmevcntr_name, .cp = 15, .crn = 15,
|
||||
{ .name = pmevcntr_name, .cp = 15, .crn = 14,
|
||||
.crm = 8 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7,
|
||||
.access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS,
|
||||
.readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn,
|
||||
.accessfn = pmreg_access },
|
||||
{ .name = pmevcntr_el0_name, .state = ARM_CP_STATE_AA64,
|
||||
.opc0 = 3, .opc1 = 3, .crn = 15, .crm = 8 | (3 & (i >> 3)),
|
||||
.opc0 = 3, .opc1 = 3, .crn = 14, .crm = 8 | (3 & (i >> 3)),
|
||||
.opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access,
|
||||
.type = ARM_CP_IO,
|
||||
.readfn = pmevcntr_readfn, .writefn = pmevcntr_writefn,
|
||||
.raw_readfn = pmevcntr_rawread,
|
||||
.raw_writefn = pmevcntr_rawwrite },
|
||||
{ .name = pmevtyper_name, .cp = 15, .crn = 15,
|
||||
{ .name = pmevtyper_name, .cp = 15, .crn = 14,
|
||||
.crm = 12 | (3 & (i >> 3)), .opc1 = 0, .opc2 = i & 7,
|
||||
.access = PL0_RW, .type = ARM_CP_IO | ARM_CP_ALIAS,
|
||||
.readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn,
|
||||
.accessfn = pmreg_access },
|
||||
{ .name = pmevtyper_el0_name, .state = ARM_CP_STATE_AA64,
|
||||
.opc0 = 3, .opc1 = 3, .crn = 15, .crm = 12 | (3 & (i >> 3)),
|
||||
.opc0 = 3, .opc1 = 3, .crn = 14, .crm = 12 | (3 & (i >> 3)),
|
||||
.opc2 = i & 7, .access = PL0_RW, .accessfn = pmreg_access,
|
||||
.type = ARM_CP_IO,
|
||||
.readfn = pmevtyper_readfn, .writefn = pmevtyper_writefn,
|
||||
@ -6103,6 +6125,38 @@ void register_cp_regs_for_features(ARMCPU *cpu)
|
||||
.resetvalue = cpu->pmceid1 },
|
||||
REGINFO_SENTINEL
|
||||
};
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
ARMCPRegUserSpaceInfo v8_user_idregs[] = {
|
||||
{ .name = "ID_AA64PFR0_EL1",
|
||||
.exported_bits = 0x000f000f00ff0000,
|
||||
.fixed_bits = 0x0000000000000011 },
|
||||
{ .name = "ID_AA64PFR1_EL1",
|
||||
.exported_bits = 0x00000000000000f0 },
|
||||
{ .name = "ID_AA64PFR*_EL1_RESERVED",
|
||||
.is_glob = true },
|
||||
{ .name = "ID_AA64ZFR0_EL1" },
|
||||
{ .name = "ID_AA64MMFR0_EL1",
|
||||
.fixed_bits = 0x00000000ff000000 },
|
||||
{ .name = "ID_AA64MMFR1_EL1" },
|
||||
{ .name = "ID_AA64MMFR*_EL1_RESERVED",
|
||||
.is_glob = true },
|
||||
{ .name = "ID_AA64DFR0_EL1",
|
||||
.fixed_bits = 0x0000000000000006 },
|
||||
{ .name = "ID_AA64DFR1_EL1" },
|
||||
{ .name = "ID_AA64DFR*_EL1_RESERVED",
|
||||
.is_glob = true },
|
||||
{ .name = "ID_AA64AFR*",
|
||||
.is_glob = true },
|
||||
{ .name = "ID_AA64ISAR0_EL1",
|
||||
.exported_bits = 0x00fffffff0fffff0 },
|
||||
{ .name = "ID_AA64ISAR1_EL1",
|
||||
.exported_bits = 0x000000f0ffffffff },
|
||||
{ .name = "ID_AA64ISAR*_EL1_RESERVED",
|
||||
.is_glob = true },
|
||||
REGUSERINFO_SENTINEL
|
||||
};
|
||||
modify_arm_cp_regs(v8_idregs, v8_user_idregs);
|
||||
#endif
|
||||
/* RVBAR_EL1 is only implemented if EL1 is the highest EL */
|
||||
if (!arm_feature(env, ARM_FEATURE_EL3) &&
|
||||
!arm_feature(env, ARM_FEATURE_EL2)) {
|
||||
@ -6379,6 +6433,15 @@ void register_cp_regs_for_features(ARMCPU *cpu)
|
||||
.opc1 = CP_ANY, .opc2 = CP_ANY, .access = PL1_W,
|
||||
.type = ARM_CP_NOP | ARM_CP_OVERRIDE
|
||||
};
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
ARMCPRegUserSpaceInfo id_v8_user_midr_cp_reginfo[] = {
|
||||
{ .name = "MIDR_EL1",
|
||||
.exported_bits = 0x00000000ffffffff },
|
||||
{ .name = "REVIDR_EL1" },
|
||||
REGUSERINFO_SENTINEL
|
||||
};
|
||||
modify_arm_cp_regs(id_v8_midr_cp_reginfo, id_v8_user_midr_cp_reginfo);
|
||||
#endif
|
||||
if (arm_feature(env, ARM_FEATURE_OMAPCP) ||
|
||||
arm_feature(env, ARM_FEATURE_STRONGARM)) {
|
||||
ARMCPRegInfo *r;
|
||||
@ -6412,6 +6475,20 @@ void register_cp_regs_for_features(ARMCPU *cpu)
|
||||
}
|
||||
|
||||
if (arm_feature(env, ARM_FEATURE_MPIDR)) {
|
||||
ARMCPRegInfo mpidr_cp_reginfo[] = {
|
||||
{ .name = "MPIDR_EL1", .state = ARM_CP_STATE_BOTH,
|
||||
.opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
|
||||
.access = PL1_R, .readfn = mpidr_read, .type = ARM_CP_NO_RAW },
|
||||
REGINFO_SENTINEL
|
||||
};
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
ARMCPRegUserSpaceInfo mpidr_user_cp_reginfo[] = {
|
||||
{ .name = "MPIDR_EL1",
|
||||
.fixed_bits = 0x0000000080000000 },
|
||||
REGUSERINFO_SENTINEL
|
||||
};
|
||||
modify_arm_cp_regs(mpidr_cp_reginfo, mpidr_user_cp_reginfo);
|
||||
#endif
|
||||
define_arm_cp_regs(cpu, mpidr_cp_reginfo);
|
||||
}
|
||||
|
||||
@ -6851,7 +6928,11 @@ void define_one_arm_cp_reg_with_opaque(ARMCPU *cpu,
|
||||
if (r->state != ARM_CP_STATE_AA32) {
|
||||
int mask = 0;
|
||||
switch (r->opc1) {
|
||||
case 0: case 1: case 2:
|
||||
case 0:
|
||||
/* min_EL EL1, but some accessible to EL0 via kernel ABI */
|
||||
mask = PL0U_R | PL1_RW;
|
||||
break;
|
||||
case 1: case 2:
|
||||
/* min_EL EL1 */
|
||||
mask = PL1_RW;
|
||||
break;
|
||||
@ -6956,6 +7037,44 @@ void define_arm_cp_regs_with_opaque(ARMCPU *cpu,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Modify ARMCPRegInfo for access from userspace.
|
||||
*
|
||||
* This is a data driven modification directed by
|
||||
* ARMCPRegUserSpaceInfo. All registers become ARM_CP_CONST as
|
||||
* user-space cannot alter any values and dynamic values pertaining to
|
||||
* execution state are hidden from user space view anyway.
|
||||
*/
|
||||
void modify_arm_cp_regs(ARMCPRegInfo *regs, const ARMCPRegUserSpaceInfo *mods)
|
||||
{
|
||||
const ARMCPRegUserSpaceInfo *m;
|
||||
ARMCPRegInfo *r;
|
||||
|
||||
for (m = mods; m->name; m++) {
|
||||
GPatternSpec *pat = NULL;
|
||||
if (m->is_glob) {
|
||||
pat = g_pattern_spec_new(m->name);
|
||||
}
|
||||
for (r = regs; r->type != ARM_CP_SENTINEL; r++) {
|
||||
if (pat && g_pattern_match_string(pat, r->name)) {
|
||||
r->type = ARM_CP_CONST;
|
||||
r->access = PL0U_R;
|
||||
r->resetvalue = 0;
|
||||
/* continue */
|
||||
} else if (strcmp(r->name, m->name) == 0) {
|
||||
r->type = ARM_CP_CONST;
|
||||
r->access = PL0U_R;
|
||||
r->resetvalue &= m->exported_bits;
|
||||
r->resetvalue |= m->fixed_bits;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (pat) {
|
||||
g_pattern_spec_free(pat);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ARMCPRegInfo *get_arm_cp_reginfo(GHashTable *cpregs, uint32_t encoded_cp)
|
||||
{
|
||||
return g_hash_table_lookup(cpregs, &encoded_cp);
|
||||
@ -12585,10 +12704,9 @@ static inline int vfp_exceptbits_from_host(int host_bits)
|
||||
|
||||
uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
|
||||
{
|
||||
int i;
|
||||
uint32_t fpscr;
|
||||
uint32_t i, fpscr;
|
||||
|
||||
fpscr = (env->vfp.xregs[ARM_VFP_FPSCR] & 0xffc8ffff)
|
||||
fpscr = env->vfp.xregs[ARM_VFP_FPSCR]
|
||||
| (env->vfp.vec_len << 16)
|
||||
| (env->vfp.vec_stride << 20);
|
||||
|
||||
@ -12597,8 +12715,11 @@ uint32_t HELPER(vfp_get_fpscr)(CPUARMState *env)
|
||||
/* FZ16 does not generate an input denormal exception. */
|
||||
i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
|
||||
& ~float_flag_input_denormal);
|
||||
|
||||
fpscr |= vfp_exceptbits_from_host(i);
|
||||
|
||||
i = env->vfp.qc[0] | env->vfp.qc[1] | env->vfp.qc[2] | env->vfp.qc[3];
|
||||
fpscr |= i ? FPCR_QC : 0;
|
||||
|
||||
return fpscr;
|
||||
}
|
||||
|
||||
@ -12630,7 +12751,7 @@ static inline int vfp_exceptbits_to_host(int target_bits)
|
||||
void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
|
||||
{
|
||||
int i;
|
||||
uint32_t changed;
|
||||
uint32_t changed = env->vfp.xregs[ARM_VFP_FPSCR];
|
||||
|
||||
/* When ARMv8.2-FP16 is not supported, FZ16 is RES0. */
|
||||
if (!cpu_isar_feature(aa64_fp16, arm_env_get_cpu(env))) {
|
||||
@ -12639,15 +12760,25 @@ void HELPER(vfp_set_fpscr)(CPUARMState *env, uint32_t val)
|
||||
|
||||
/*
|
||||
* We don't implement trapped exception handling, so the
|
||||
* trap enable bits are all RAZ/WI (not RES0!)
|
||||
* trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
|
||||
*
|
||||
* If we exclude the exception flags, IOC|DZC|OFC|UFC|IXC|IDC
|
||||
* (which are stored in fp_status), and the other RES0 bits
|
||||
* in between, then we clear all of the low 16 bits.
|
||||
*/
|
||||
val &= ~(FPCR_IDE | FPCR_IXE | FPCR_UFE | FPCR_OFE | FPCR_DZE | FPCR_IOE);
|
||||
|
||||
changed = env->vfp.xregs[ARM_VFP_FPSCR];
|
||||
env->vfp.xregs[ARM_VFP_FPSCR] = (val & 0xffc8ffff);
|
||||
env->vfp.xregs[ARM_VFP_FPSCR] = val & 0xf7c80000;
|
||||
env->vfp.vec_len = (val >> 16) & 7;
|
||||
env->vfp.vec_stride = (val >> 20) & 3;
|
||||
|
||||
/*
|
||||
* The bit we set within fpscr_q is arbitrary; the register as a
|
||||
* whole being zero/non-zero is what counts.
|
||||
*/
|
||||
env->vfp.qc[0] = val & FPCR_QC;
|
||||
env->vfp.qc[1] = 0;
|
||||
env->vfp.qc[2] = 0;
|
||||
env->vfp.qc[3] = 0;
|
||||
|
||||
changed ^= val;
|
||||
if (changed & (3 << 22)) {
|
||||
i = (val >> 22) & 3;
|
||||
@ -12752,31 +12883,40 @@ float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
|
||||
return float64_sqrt(a, &env->vfp.fp_status);
|
||||
}
|
||||
|
||||
static void softfloat_to_vfp_compare(CPUARMState *env, int cmp)
|
||||
{
|
||||
uint32_t flags;
|
||||
switch (cmp) {
|
||||
case float_relation_equal:
|
||||
flags = 0x6;
|
||||
break;
|
||||
case float_relation_less:
|
||||
flags = 0x8;
|
||||
break;
|
||||
case float_relation_greater:
|
||||
flags = 0x2;
|
||||
break;
|
||||
case float_relation_unordered:
|
||||
flags = 0x3;
|
||||
break;
|
||||
default:
|
||||
g_assert_not_reached();
|
||||
}
|
||||
env->vfp.xregs[ARM_VFP_FPSCR] =
|
||||
deposit32(env->vfp.xregs[ARM_VFP_FPSCR], 28, 4, flags);
|
||||
}
|
||||
|
||||
/* XXX: check quiet/signaling case */
|
||||
#define DO_VFP_cmp(p, type) \
|
||||
void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \
|
||||
{ \
|
||||
uint32_t flags; \
|
||||
switch(type ## _compare_quiet(a, b, &env->vfp.fp_status)) { \
|
||||
case 0: flags = 0x6; break; \
|
||||
case -1: flags = 0x8; break; \
|
||||
case 1: flags = 0x2; break; \
|
||||
default: case 2: flags = 0x3; break; \
|
||||
} \
|
||||
env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \
|
||||
| (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \
|
||||
softfloat_to_vfp_compare(env, \
|
||||
type ## _compare_quiet(a, b, &env->vfp.fp_status)); \
|
||||
} \
|
||||
void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \
|
||||
{ \
|
||||
uint32_t flags; \
|
||||
switch(type ## _compare(a, b, &env->vfp.fp_status)) { \
|
||||
case 0: flags = 0x6; break; \
|
||||
case -1: flags = 0x8; break; \
|
||||
case 1: flags = 0x2; break; \
|
||||
default: case 2: flags = 0x3; break; \
|
||||
} \
|
||||
env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \
|
||||
| (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \
|
||||
softfloat_to_vfp_compare(env, \
|
||||
type ## _compare(a, b, &env->vfp.fp_status)); \
|
||||
}
|
||||
DO_VFP_cmp(s, float32)
|
||||
DO_VFP_cmp(d, float64)
|
||||
|
@ -276,18 +276,6 @@ DEF_HELPER_2(neon_cge_s16, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_cge_u32, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_cge_s32, i32, i32, i32)
|
||||
|
||||
DEF_HELPER_2(neon_min_u8, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_min_s8, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_min_u16, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_min_s16, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_min_u32, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_min_s32, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_max_u8, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_max_s8, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_max_u16, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_max_s16, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_max_u32, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_max_s32, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_pmin_u8, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_pmin_s8, i32, i32, i32)
|
||||
DEF_HELPER_2(neon_pmin_u16, i32, i32, i32)
|
||||
@ -653,6 +641,39 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
|
||||
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_uqadd_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_uqadd_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sqadd_b, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sqadd_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sqadd_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sqadd_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_uqsub_b, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_uqsub_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_uqsub_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_uqsub_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sqsub_b, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sqsub_h, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sqsub_s, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
DEF_HELPER_FLAGS_5(gvec_sqsub_d, TCG_CALL_NO_RWG,
|
||||
void, ptr, ptr, ptr, ptr, i32)
|
||||
|
||||
#ifdef TARGET_AARCH64
|
||||
#include "helper-a64.h"
|
||||
#include "helper-sve.h"
|
||||
|
@ -387,24 +387,8 @@ int kvm_arch_put_registers(CPUState *cs, int level)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Note that we do not call write_cpustate_to_list()
|
||||
* here, so we are only writing the tuple list back to
|
||||
* KVM. This is safe because nothing can change the
|
||||
* CPUARMState cp15 fields (in particular gdb accesses cannot)
|
||||
* and so there are no changes to sync. In fact syncing would
|
||||
* be wrong at this point: for a constant register where TCG and
|
||||
* KVM disagree about its value, the preceding write_list_to_cpustate()
|
||||
* would not have had any effect on the CPUARMState value (since the
|
||||
* register is read-only), and a write_cpustate_to_list() here would
|
||||
* then try to write the TCG value back into KVM -- this would either
|
||||
* fail or incorrectly change the value the guest sees.
|
||||
*
|
||||
* If we ever want to allow the user to modify cp15 registers via
|
||||
* the gdb stub, we would need to be more clever here (for instance
|
||||
* tracking the set of registers kvm_arch_get_registers() successfully
|
||||
* managed to update the CPUARMState with, and only allowing those
|
||||
* to be written back up into the kernel).
|
||||
*/
|
||||
write_cpustate_to_list(cpu, true);
|
||||
|
||||
if (!write_list_to_kvmstate(cpu, level)) {
|
||||
return EINVAL;
|
||||
}
|
||||
|
@ -838,6 +838,8 @@ int kvm_arch_put_registers(CPUState *cs, int level)
|
||||
return ret;
|
||||
}
|
||||
|
||||
write_cpustate_to_list(cpu, true);
|
||||
|
||||
if (!write_list_to_kvmstate(cpu, level)) {
|
||||
return EINVAL;
|
||||
}
|
||||
|
@ -630,7 +630,7 @@ static int cpu_pre_save(void *opaque)
|
||||
abort();
|
||||
}
|
||||
} else {
|
||||
if (!write_cpustate_to_list(cpu)) {
|
||||
if (!write_cpustate_to_list(cpu, false)) {
|
||||
/* This should never fail. */
|
||||
abort();
|
||||
}
|
||||
|
@ -15,7 +15,7 @@
|
||||
#define SIGNBIT (uint32_t)0x80000000
|
||||
#define SIGNBIT64 ((uint64_t)1 << 63)
|
||||
|
||||
#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q
|
||||
#define SET_QC() env->vfp.qc[0] = 1
|
||||
|
||||
#define NEON_TYPE1(name, type) \
|
||||
typedef struct \
|
||||
@ -581,12 +581,6 @@ NEON_VOP(cge_u32, neon_u32, 1)
|
||||
#undef NEON_FN
|
||||
|
||||
#define NEON_FN(dest, src1, src2) dest = (src1 < src2) ? src1 : src2
|
||||
NEON_VOP(min_s8, neon_s8, 4)
|
||||
NEON_VOP(min_u8, neon_u8, 4)
|
||||
NEON_VOP(min_s16, neon_s16, 2)
|
||||
NEON_VOP(min_u16, neon_u16, 2)
|
||||
NEON_VOP(min_s32, neon_s32, 1)
|
||||
NEON_VOP(min_u32, neon_u32, 1)
|
||||
NEON_POP(pmin_s8, neon_s8, 4)
|
||||
NEON_POP(pmin_u8, neon_u8, 4)
|
||||
NEON_POP(pmin_s16, neon_s16, 2)
|
||||
@ -594,12 +588,6 @@ NEON_POP(pmin_u16, neon_u16, 2)
|
||||
#undef NEON_FN
|
||||
|
||||
#define NEON_FN(dest, src1, src2) dest = (src1 > src2) ? src1 : src2
|
||||
NEON_VOP(max_s8, neon_s8, 4)
|
||||
NEON_VOP(max_u8, neon_u8, 4)
|
||||
NEON_VOP(max_s16, neon_s16, 2)
|
||||
NEON_VOP(max_u16, neon_u16, 2)
|
||||
NEON_VOP(max_s32, neon_s32, 1)
|
||||
NEON_VOP(max_u32, neon_u32, 1)
|
||||
NEON_POP(pmax_s8, neon_s8, 4)
|
||||
NEON_POP(pmax_u8, neon_u8, 4)
|
||||
NEON_POP(pmax_s16, neon_s16, 2)
|
||||
|
@ -10648,11 +10648,7 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
|
||||
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
|
||||
return;
|
||||
case 2: /* ORR */
|
||||
if (rn == rm) { /* MOV */
|
||||
gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_mov, 0);
|
||||
} else {
|
||||
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
|
||||
}
|
||||
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
|
||||
return;
|
||||
case 3: /* ORN */
|
||||
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
|
||||
@ -10952,6 +10948,36 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
|
||||
}
|
||||
|
||||
switch (opcode) {
|
||||
case 0x01: /* SQADD, UQADD */
|
||||
tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
|
||||
offsetof(CPUARMState, vfp.qc),
|
||||
vec_full_reg_offset(s, rn),
|
||||
vec_full_reg_offset(s, rm),
|
||||
is_q ? 16 : 8, vec_full_reg_size(s),
|
||||
(u ? uqadd_op : sqadd_op) + size);
|
||||
return;
|
||||
case 0x05: /* SQSUB, UQSUB */
|
||||
tcg_gen_gvec_4(vec_full_reg_offset(s, rd),
|
||||
offsetof(CPUARMState, vfp.qc),
|
||||
vec_full_reg_offset(s, rn),
|
||||
vec_full_reg_offset(s, rm),
|
||||
is_q ? 16 : 8, vec_full_reg_size(s),
|
||||
(u ? uqsub_op : sqsub_op) + size);
|
||||
return;
|
||||
case 0x0c: /* SMAX, UMAX */
|
||||
if (u) {
|
||||
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umax, size);
|
||||
} else {
|
||||
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smax, size);
|
||||
}
|
||||
return;
|
||||
case 0x0d: /* SMIN, UMIN */
|
||||
if (u) {
|
||||
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_umin, size);
|
||||
} else {
|
||||
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_smin, size);
|
||||
}
|
||||
return;
|
||||
case 0x10: /* ADD, SUB */
|
||||
if (u) {
|
||||
gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
|
||||
@ -11033,16 +11059,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
|
||||
genfn = fns[size][u];
|
||||
break;
|
||||
}
|
||||
case 0x1: /* SQADD, UQADD */
|
||||
{
|
||||
static NeonGenTwoOpEnvFn * const fns[3][2] = {
|
||||
{ gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
|
||||
{ gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
|
||||
{ gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
|
||||
};
|
||||
genenvfn = fns[size][u];
|
||||
break;
|
||||
}
|
||||
case 0x2: /* SRHADD, URHADD */
|
||||
{
|
||||
static NeonGenTwoOpFn * const fns[3][2] = {
|
||||
@ -11063,16 +11079,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
|
||||
genfn = fns[size][u];
|
||||
break;
|
||||
}
|
||||
case 0x5: /* SQSUB, UQSUB */
|
||||
{
|
||||
static NeonGenTwoOpEnvFn * const fns[3][2] = {
|
||||
{ gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
|
||||
{ gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
|
||||
{ gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
|
||||
};
|
||||
genenvfn = fns[size][u];
|
||||
break;
|
||||
}
|
||||
case 0x8: /* SSHL, USHL */
|
||||
{
|
||||
static NeonGenTwoOpFn * const fns[3][2] = {
|
||||
@ -11113,27 +11119,6 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
|
||||
genenvfn = fns[size][u];
|
||||
break;
|
||||
}
|
||||
case 0xc: /* SMAX, UMAX */
|
||||
{
|
||||
static NeonGenTwoOpFn * const fns[3][2] = {
|
||||
{ gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
|
||||
{ gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
|
||||
{ tcg_gen_smax_i32, tcg_gen_umax_i32 },
|
||||
};
|
||||
genfn = fns[size][u];
|
||||
break;
|
||||
}
|
||||
|
||||
case 0xd: /* SMIN, UMIN */
|
||||
{
|
||||
static NeonGenTwoOpFn * const fns[3][2] = {
|
||||
{ gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
|
||||
{ gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
|
||||
{ tcg_gen_smin_i32, tcg_gen_umin_i32 },
|
||||
};
|
||||
genfn = fns[size][u];
|
||||
break;
|
||||
}
|
||||
case 0xe: /* SABD, UABD */
|
||||
case 0xf: /* SABA, UABA */
|
||||
{
|
||||
|
@ -280,11 +280,7 @@ static bool trans_AND_zzz(DisasContext *s, arg_rrr_esz *a)
|
||||
|
||||
static bool trans_ORR_zzz(DisasContext *s, arg_rrr_esz *a)
|
||||
{
|
||||
if (a->rn == a->rm) { /* MOV */
|
||||
return do_mov_z(s, a->rd, a->rn);
|
||||
} else {
|
||||
return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
|
||||
}
|
||||
return do_vector3_z(s, tcg_gen_gvec_or, 0, a->rd, a->rn, a->rm);
|
||||
}
|
||||
|
||||
static bool trans_EOR_zzz(DisasContext *s, arg_rrr_esz *a)
|
||||
|
@ -4760,10 +4760,10 @@ static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
|
||||
}
|
||||
|
||||
/* 32-bit pairwise ops end up the same as the elementwise versions. */
|
||||
#define gen_helper_neon_pmax_s32 gen_helper_neon_max_s32
|
||||
#define gen_helper_neon_pmax_u32 gen_helper_neon_max_u32
|
||||
#define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32
|
||||
#define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32
|
||||
#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
|
||||
#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
|
||||
#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
|
||||
#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
|
||||
|
||||
#define GEN_NEON_INTEGER_OP_ENV(name) do { \
|
||||
switch ((size << 1) | u) { \
|
||||
@ -6148,6 +6148,142 @@ const GVecGen3 cmtst_op[4] = {
|
||||
.vece = MO_64 },
|
||||
};
|
||||
|
||||
static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
|
||||
TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
TCGv_vec x = tcg_temp_new_vec_matching(t);
|
||||
tcg_gen_add_vec(vece, x, a, b);
|
||||
tcg_gen_usadd_vec(vece, t, a, b);
|
||||
tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
|
||||
tcg_gen_or_vec(vece, sat, sat, x);
|
||||
tcg_temp_free_vec(x);
|
||||
}
|
||||
|
||||
const GVecGen4 uqadd_op[4] = {
|
||||
{ .fniv = gen_uqadd_vec,
|
||||
.fno = gen_helper_gvec_uqadd_b,
|
||||
.opc = INDEX_op_usadd_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_8 },
|
||||
{ .fniv = gen_uqadd_vec,
|
||||
.fno = gen_helper_gvec_uqadd_h,
|
||||
.opc = INDEX_op_usadd_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_16 },
|
||||
{ .fniv = gen_uqadd_vec,
|
||||
.fno = gen_helper_gvec_uqadd_s,
|
||||
.opc = INDEX_op_usadd_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_32 },
|
||||
{ .fniv = gen_uqadd_vec,
|
||||
.fno = gen_helper_gvec_uqadd_d,
|
||||
.opc = INDEX_op_usadd_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_64 },
|
||||
};
|
||||
|
||||
static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
|
||||
TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
TCGv_vec x = tcg_temp_new_vec_matching(t);
|
||||
tcg_gen_add_vec(vece, x, a, b);
|
||||
tcg_gen_ssadd_vec(vece, t, a, b);
|
||||
tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
|
||||
tcg_gen_or_vec(vece, sat, sat, x);
|
||||
tcg_temp_free_vec(x);
|
||||
}
|
||||
|
||||
const GVecGen4 sqadd_op[4] = {
|
||||
{ .fniv = gen_sqadd_vec,
|
||||
.fno = gen_helper_gvec_sqadd_b,
|
||||
.opc = INDEX_op_ssadd_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_8 },
|
||||
{ .fniv = gen_sqadd_vec,
|
||||
.fno = gen_helper_gvec_sqadd_h,
|
||||
.opc = INDEX_op_ssadd_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_16 },
|
||||
{ .fniv = gen_sqadd_vec,
|
||||
.fno = gen_helper_gvec_sqadd_s,
|
||||
.opc = INDEX_op_ssadd_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_32 },
|
||||
{ .fniv = gen_sqadd_vec,
|
||||
.fno = gen_helper_gvec_sqadd_d,
|
||||
.opc = INDEX_op_ssadd_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_64 },
|
||||
};
|
||||
|
||||
static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
|
||||
TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
TCGv_vec x = tcg_temp_new_vec_matching(t);
|
||||
tcg_gen_sub_vec(vece, x, a, b);
|
||||
tcg_gen_ussub_vec(vece, t, a, b);
|
||||
tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
|
||||
tcg_gen_or_vec(vece, sat, sat, x);
|
||||
tcg_temp_free_vec(x);
|
||||
}
|
||||
|
||||
const GVecGen4 uqsub_op[4] = {
|
||||
{ .fniv = gen_uqsub_vec,
|
||||
.fno = gen_helper_gvec_uqsub_b,
|
||||
.opc = INDEX_op_ussub_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_8 },
|
||||
{ .fniv = gen_uqsub_vec,
|
||||
.fno = gen_helper_gvec_uqsub_h,
|
||||
.opc = INDEX_op_ussub_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_16 },
|
||||
{ .fniv = gen_uqsub_vec,
|
||||
.fno = gen_helper_gvec_uqsub_s,
|
||||
.opc = INDEX_op_ussub_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_32 },
|
||||
{ .fniv = gen_uqsub_vec,
|
||||
.fno = gen_helper_gvec_uqsub_d,
|
||||
.opc = INDEX_op_ussub_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_64 },
|
||||
};
|
||||
|
||||
static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
|
||||
TCGv_vec a, TCGv_vec b)
|
||||
{
|
||||
TCGv_vec x = tcg_temp_new_vec_matching(t);
|
||||
tcg_gen_sub_vec(vece, x, a, b);
|
||||
tcg_gen_sssub_vec(vece, t, a, b);
|
||||
tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
|
||||
tcg_gen_or_vec(vece, sat, sat, x);
|
||||
tcg_temp_free_vec(x);
|
||||
}
|
||||
|
||||
const GVecGen4 sqsub_op[4] = {
|
||||
{ .fniv = gen_sqsub_vec,
|
||||
.fno = gen_helper_gvec_sqsub_b,
|
||||
.opc = INDEX_op_sssub_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_8 },
|
||||
{ .fniv = gen_sqsub_vec,
|
||||
.fno = gen_helper_gvec_sqsub_h,
|
||||
.opc = INDEX_op_sssub_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_16 },
|
||||
{ .fniv = gen_sqsub_vec,
|
||||
.fno = gen_helper_gvec_sqsub_s,
|
||||
.opc = INDEX_op_sssub_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_32 },
|
||||
{ .fniv = gen_sqsub_vec,
|
||||
.fno = gen_helper_gvec_sqsub_d,
|
||||
.opc = INDEX_op_sssub_vec,
|
||||
.write_aofs = true,
|
||||
.vece = MO_64 },
|
||||
};
|
||||
|
||||
/* Translate a NEON data processing instruction. Return nonzero if the
|
||||
instruction is invalid.
|
||||
We process data in a mixture of 32-bit and 64-bit chunks.
|
||||
@ -6294,15 +6430,9 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||
tcg_gen_gvec_andc(0, rd_ofs, rn_ofs, rm_ofs,
|
||||
vec_size, vec_size);
|
||||
break;
|
||||
case 2:
|
||||
if (rn == rm) {
|
||||
/* VMOV */
|
||||
tcg_gen_gvec_mov(0, rd_ofs, rn_ofs, vec_size, vec_size);
|
||||
} else {
|
||||
/* VORR */
|
||||
tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
|
||||
vec_size, vec_size);
|
||||
}
|
||||
case 2: /* VORR */
|
||||
tcg_gen_gvec_or(0, rd_ofs, rn_ofs, rm_ofs,
|
||||
vec_size, vec_size);
|
||||
break;
|
||||
case 3: /* VORN */
|
||||
tcg_gen_gvec_orc(0, rd_ofs, rn_ofs, rm_ofs,
|
||||
@ -6337,6 +6467,18 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||
}
|
||||
return 0;
|
||||
|
||||
case NEON_3R_VQADD:
|
||||
tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
|
||||
rn_ofs, rm_ofs, vec_size, vec_size,
|
||||
(u ? uqadd_op : sqadd_op) + size);
|
||||
break;
|
||||
|
||||
case NEON_3R_VQSUB:
|
||||
tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
|
||||
rn_ofs, rm_ofs, vec_size, vec_size,
|
||||
(u ? uqsub_op : sqsub_op) + size);
|
||||
break;
|
||||
|
||||
case NEON_3R_VMUL: /* VMUL */
|
||||
if (u) {
|
||||
/* Polynomial case allows only P8 and is handled below. */
|
||||
@ -6374,6 +6516,25 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||
tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
|
||||
rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
|
||||
return 0;
|
||||
|
||||
case NEON_3R_VMAX:
|
||||
if (u) {
|
||||
tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
|
||||
vec_size, vec_size);
|
||||
} else {
|
||||
tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
|
||||
vec_size, vec_size);
|
||||
}
|
||||
return 0;
|
||||
case NEON_3R_VMIN:
|
||||
if (u) {
|
||||
tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
|
||||
vec_size, vec_size);
|
||||
} else {
|
||||
tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
|
||||
vec_size, vec_size);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (size == 3) {
|
||||
@ -6382,24 +6543,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||
neon_load_reg64(cpu_V0, rn + pass);
|
||||
neon_load_reg64(cpu_V1, rm + pass);
|
||||
switch (op) {
|
||||
case NEON_3R_VQADD:
|
||||
if (u) {
|
||||
gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
|
||||
cpu_V0, cpu_V1);
|
||||
} else {
|
||||
gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
|
||||
cpu_V0, cpu_V1);
|
||||
}
|
||||
break;
|
||||
case NEON_3R_VQSUB:
|
||||
if (u) {
|
||||
gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
|
||||
cpu_V0, cpu_V1);
|
||||
} else {
|
||||
gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
|
||||
cpu_V0, cpu_V1);
|
||||
}
|
||||
break;
|
||||
case NEON_3R_VSHL:
|
||||
if (u) {
|
||||
gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
|
||||
@ -6515,18 +6658,12 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||
case NEON_3R_VHADD:
|
||||
GEN_NEON_INTEGER_OP(hadd);
|
||||
break;
|
||||
case NEON_3R_VQADD:
|
||||
GEN_NEON_INTEGER_OP_ENV(qadd);
|
||||
break;
|
||||
case NEON_3R_VRHADD:
|
||||
GEN_NEON_INTEGER_OP(rhadd);
|
||||
break;
|
||||
case NEON_3R_VHSUB:
|
||||
GEN_NEON_INTEGER_OP(hsub);
|
||||
break;
|
||||
case NEON_3R_VQSUB:
|
||||
GEN_NEON_INTEGER_OP_ENV(qsub);
|
||||
break;
|
||||
case NEON_3R_VSHL:
|
||||
GEN_NEON_INTEGER_OP(shl);
|
||||
break;
|
||||
@ -6539,12 +6676,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
|
||||
case NEON_3R_VQRSHL:
|
||||
GEN_NEON_INTEGER_OP_ENV(qrshl);
|
||||
break;
|
||||
case NEON_3R_VMAX:
|
||||
GEN_NEON_INTEGER_OP(max);
|
||||
break;
|
||||
case NEON_3R_VMIN:
|
||||
GEN_NEON_INTEGER_OP(min);
|
||||
break;
|
||||
case NEON_3R_VABD:
|
||||
GEN_NEON_INTEGER_OP(abd);
|
||||
break;
|
||||
@ -13634,7 +13765,7 @@ void arm_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf,
|
||||
i * 2 + 1, (uint32_t)(v >> 32),
|
||||
i, v);
|
||||
}
|
||||
cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
|
||||
cpu_fprintf(f, "FPSCR: %08x\n", vfp_get_fpscr(env));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -214,6 +214,10 @@ extern const GVecGen2i ssra_op[4];
|
||||
extern const GVecGen2i usra_op[4];
|
||||
extern const GVecGen2i sri_op[4];
|
||||
extern const GVecGen2i sli_op[4];
|
||||
extern const GVecGen4 uqadd_op[4];
|
||||
extern const GVecGen4 sqadd_op[4];
|
||||
extern const GVecGen4 uqsub_op[4];
|
||||
extern const GVecGen4 sqsub_op[4];
|
||||
void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b);
|
||||
|
||||
/*
|
||||
|
@ -36,7 +36,7 @@
|
||||
#define H4(x) (x)
|
||||
#endif
|
||||
|
||||
#define SET_QC() env->vfp.xregs[ARM_VFP_FPSCR] |= CPSR_Q
|
||||
#define SET_QC() env->vfp.qc[0] = 1
|
||||
|
||||
static void clear_tail(void *vd, uintptr_t opr_sz, uintptr_t max_sz)
|
||||
{
|
||||
@ -638,6 +638,7 @@ void HELPER(NAME)(void *vd, void *vn, void *stat, uint32_t desc) \
|
||||
for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
|
||||
d[i] = FUNC(n[i], stat); \
|
||||
} \
|
||||
clear_tail(d, oprsz, simd_maxsz(desc)); \
|
||||
}
|
||||
|
||||
DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16)
|
||||
@ -688,6 +689,7 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, void *stat, uint32_t desc) \
|
||||
for (i = 0; i < oprsz / sizeof(TYPE); i++) { \
|
||||
d[i] = FUNC(n[i], m[i], stat); \
|
||||
} \
|
||||
clear_tail(d, oprsz, simd_maxsz(desc)); \
|
||||
}
|
||||
|
||||
DO_3OP(gvec_fadd_h, float16_add, float16)
|
||||
@ -766,3 +768,133 @@ DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
|
||||
DO_FMLA_IDX(gvec_fmla_idx_d, float64, )
|
||||
|
||||
#undef DO_FMLA_IDX
|
||||
|
||||
#define DO_SAT(NAME, WTYPE, TYPEN, TYPEM, OP, MIN, MAX) \
|
||||
void HELPER(NAME)(void *vd, void *vq, void *vn, void *vm, uint32_t desc) \
|
||||
{ \
|
||||
intptr_t i, oprsz = simd_oprsz(desc); \
|
||||
TYPEN *d = vd, *n = vn; TYPEM *m = vm; \
|
||||
bool q = false; \
|
||||
for (i = 0; i < oprsz / sizeof(TYPEN); i++) { \
|
||||
WTYPE dd = (WTYPE)n[i] OP m[i]; \
|
||||
if (dd < MIN) { \
|
||||
dd = MIN; \
|
||||
q = true; \
|
||||
} else if (dd > MAX) { \
|
||||
dd = MAX; \
|
||||
q = true; \
|
||||
} \
|
||||
d[i] = dd; \
|
||||
} \
|
||||
if (q) { \
|
||||
uint32_t *qc = vq; \
|
||||
qc[0] = 1; \
|
||||
} \
|
||||
clear_tail(d, oprsz, simd_maxsz(desc)); \
|
||||
}
|
||||
|
||||
DO_SAT(gvec_uqadd_b, int, uint8_t, uint8_t, +, 0, UINT8_MAX)
|
||||
DO_SAT(gvec_uqadd_h, int, uint16_t, uint16_t, +, 0, UINT16_MAX)
|
||||
DO_SAT(gvec_uqadd_s, int64_t, uint32_t, uint32_t, +, 0, UINT32_MAX)
|
||||
|
||||
DO_SAT(gvec_sqadd_b, int, int8_t, int8_t, +, INT8_MIN, INT8_MAX)
|
||||
DO_SAT(gvec_sqadd_h, int, int16_t, int16_t, +, INT16_MIN, INT16_MAX)
|
||||
DO_SAT(gvec_sqadd_s, int64_t, int32_t, int32_t, +, INT32_MIN, INT32_MAX)
|
||||
|
||||
DO_SAT(gvec_uqsub_b, int, uint8_t, uint8_t, -, 0, UINT8_MAX)
|
||||
DO_SAT(gvec_uqsub_h, int, uint16_t, uint16_t, -, 0, UINT16_MAX)
|
||||
DO_SAT(gvec_uqsub_s, int64_t, uint32_t, uint32_t, -, 0, UINT32_MAX)
|
||||
|
||||
DO_SAT(gvec_sqsub_b, int, int8_t, int8_t, -, INT8_MIN, INT8_MAX)
|
||||
DO_SAT(gvec_sqsub_h, int, int16_t, int16_t, -, INT16_MIN, INT16_MAX)
|
||||
DO_SAT(gvec_sqsub_s, int64_t, int32_t, int32_t, -, INT32_MIN, INT32_MAX)
|
||||
|
||||
#undef DO_SAT
|
||||
|
||||
void HELPER(gvec_uqadd_d)(void *vd, void *vq, void *vn,
|
||||
void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
uint64_t *d = vd, *n = vn, *m = vm;
|
||||
bool q = false;
|
||||
|
||||
for (i = 0; i < oprsz / 8; i++) {
|
||||
uint64_t nn = n[i], mm = m[i], dd = nn + mm;
|
||||
if (dd < nn) {
|
||||
dd = UINT64_MAX;
|
||||
q = true;
|
||||
}
|
||||
d[i] = dd;
|
||||
}
|
||||
if (q) {
|
||||
uint32_t *qc = vq;
|
||||
qc[0] = 1;
|
||||
}
|
||||
clear_tail(d, oprsz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_uqsub_d)(void *vd, void *vq, void *vn,
|
||||
void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
uint64_t *d = vd, *n = vn, *m = vm;
|
||||
bool q = false;
|
||||
|
||||
for (i = 0; i < oprsz / 8; i++) {
|
||||
uint64_t nn = n[i], mm = m[i], dd = nn - mm;
|
||||
if (nn < mm) {
|
||||
dd = 0;
|
||||
q = true;
|
||||
}
|
||||
d[i] = dd;
|
||||
}
|
||||
if (q) {
|
||||
uint32_t *qc = vq;
|
||||
qc[0] = 1;
|
||||
}
|
||||
clear_tail(d, oprsz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_sqadd_d)(void *vd, void *vq, void *vn,
|
||||
void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
int64_t *d = vd, *n = vn, *m = vm;
|
||||
bool q = false;
|
||||
|
||||
for (i = 0; i < oprsz / 8; i++) {
|
||||
int64_t nn = n[i], mm = m[i], dd = nn + mm;
|
||||
if (((dd ^ nn) & ~(nn ^ mm)) & INT64_MIN) {
|
||||
dd = (nn >> 63) ^ ~INT64_MIN;
|
||||
q = true;
|
||||
}
|
||||
d[i] = dd;
|
||||
}
|
||||
if (q) {
|
||||
uint32_t *qc = vq;
|
||||
qc[0] = 1;
|
||||
}
|
||||
clear_tail(d, oprsz, simd_maxsz(desc));
|
||||
}
|
||||
|
||||
void HELPER(gvec_sqsub_d)(void *vd, void *vq, void *vn,
|
||||
void *vm, uint32_t desc)
|
||||
{
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
int64_t *d = vd, *n = vn, *m = vm;
|
||||
bool q = false;
|
||||
|
||||
for (i = 0; i < oprsz / 8; i++) {
|
||||
int64_t nn = n[i], mm = m[i], dd = nn - mm;
|
||||
if (((dd ^ nn) & (nn ^ mm)) & INT64_MIN) {
|
||||
dd = (nn >> 63) ^ ~INT64_MIN;
|
||||
q = true;
|
||||
}
|
||||
d[i] = dd;
|
||||
}
|
||||
if (q) {
|
||||
uint32_t *qc = vq;
|
||||
qc[0] = 1;
|
||||
}
|
||||
clear_tail(d, oprsz, simd_maxsz(desc));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user