diff --git a/accel/tcg/atomic_template.h b/accel/tcg/atomic_template.h index e022df4571..3f41ef2782 100644 --- a/accel/tcg/atomic_template.h +++ b/accel/tcg/atomic_template.h @@ -25,18 +25,22 @@ #elif DATA_SIZE == 8 # define SUFFIX q # define DATA_TYPE uint64_t +# define SDATA_TYPE int64_t # define BSWAP bswap64 #elif DATA_SIZE == 4 # define SUFFIX l # define DATA_TYPE uint32_t +# define SDATA_TYPE int32_t # define BSWAP bswap32 #elif DATA_SIZE == 2 # define SUFFIX w # define DATA_TYPE uint16_t +# define SDATA_TYPE int16_t # define BSWAP bswap16 #elif DATA_SIZE == 1 # define SUFFIX b # define DATA_TYPE uint8_t +# define SDATA_TYPE int8_t # define BSWAP #else # error unsupported data size @@ -118,6 +122,39 @@ GEN_ATOMIC_HELPER(or_fetch) GEN_ATOMIC_HELPER(xor_fetch) #undef GEN_ATOMIC_HELPER + +/* These helpers are, as a whole, full barriers. Within the helper, + * the leading barrier is explicit and the trailing barrier is within + * cmpxchg primitive. + */ +#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \ +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ + ABI_TYPE xval EXTRA_ARGS) \ +{ \ + ATOMIC_MMU_DECLS; \ + XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ + XDATA_TYPE cmp, old, new, val = xval; \ + smp_mb(); \ + cmp = atomic_read__nocheck(haddr); \ + do { \ + old = cmp; new = FN(old, val); \ + cmp = atomic_cmpxchg__nocheck(haddr, old, new); \ + } while (cmp != old); \ + ATOMIC_MMU_CLEANUP; \ + return RET; \ +} + +GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old) +GEN_ATOMIC_HELPER_FN(fetch_umin, MIN, DATA_TYPE, old) +GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old) +GEN_ATOMIC_HELPER_FN(fetch_umax, MAX, DATA_TYPE, old) + +GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new) +GEN_ATOMIC_HELPER_FN(umin_fetch, MIN, DATA_TYPE, new) +GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new) +GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new) + +#undef GEN_ATOMIC_HELPER_FN #endif /* DATA SIZE >= 16 */ #undef END @@ -192,47 +229,45 @@ GEN_ATOMIC_HELPER(xor_fetch) #undef GEN_ATOMIC_HELPER +/* These helpers are, as a whole, full barriers. Within the helper, + * the leading barrier is explicit and the trailing barrier is within + * cmpxchg primitive. + */ +#define GEN_ATOMIC_HELPER_FN(X, FN, XDATA_TYPE, RET) \ +ABI_TYPE ATOMIC_NAME(X)(CPUArchState *env, target_ulong addr, \ + ABI_TYPE xval EXTRA_ARGS) \ +{ \ + ATOMIC_MMU_DECLS; \ + XDATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; \ + XDATA_TYPE ldo, ldn, old, new, val = xval; \ + smp_mb(); \ + ldn = atomic_read__nocheck(haddr); \ + do { \ + ldo = ldn; old = BSWAP(ldo); new = FN(old, val); \ + ldn = atomic_cmpxchg__nocheck(haddr, ldo, BSWAP(new)); \ + } while (ldo != ldn); \ + ATOMIC_MMU_CLEANUP; \ + return RET; \ +} + +GEN_ATOMIC_HELPER_FN(fetch_smin, MIN, SDATA_TYPE, old) +GEN_ATOMIC_HELPER_FN(fetch_umin, MIN, DATA_TYPE, old) +GEN_ATOMIC_HELPER_FN(fetch_smax, MAX, SDATA_TYPE, old) +GEN_ATOMIC_HELPER_FN(fetch_umax, MAX, DATA_TYPE, old) + +GEN_ATOMIC_HELPER_FN(smin_fetch, MIN, SDATA_TYPE, new) +GEN_ATOMIC_HELPER_FN(umin_fetch, MIN, DATA_TYPE, new) +GEN_ATOMIC_HELPER_FN(smax_fetch, MAX, SDATA_TYPE, new) +GEN_ATOMIC_HELPER_FN(umax_fetch, MAX, DATA_TYPE, new) + /* Note that for addition, we need to use a separate cmpxchg loop instead of bswaps for the reverse-host-endian helpers. */ -ABI_TYPE ATOMIC_NAME(fetch_add)(CPUArchState *env, target_ulong addr, - ABI_TYPE val EXTRA_ARGS) -{ - ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; - DATA_TYPE ldo, ldn, ret, sto; +#define ADD(X, Y) (X + Y) +GEN_ATOMIC_HELPER_FN(fetch_add, ADD, DATA_TYPE, old) +GEN_ATOMIC_HELPER_FN(add_fetch, ADD, DATA_TYPE, new) +#undef ADD - ldo = atomic_read__nocheck(haddr); - while (1) { - ret = BSWAP(ldo); - sto = BSWAP(ret + val); - ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto); - if (ldn == ldo) { - ATOMIC_MMU_CLEANUP; - return ret; - } - ldo = ldn; - } -} - -ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr, - ABI_TYPE val EXTRA_ARGS) -{ - ATOMIC_MMU_DECLS; - DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; - DATA_TYPE ldo, ldn, ret, sto; - - ldo = atomic_read__nocheck(haddr); - while (1) { - ret = BSWAP(ldo) + val; - sto = BSWAP(ret); - ldn = atomic_cmpxchg__nocheck(haddr, ldo, sto); - if (ldn == ldo) { - ATOMIC_MMU_CLEANUP; - return ret; - } - ldo = ldn; - } -} +#undef GEN_ATOMIC_HELPER_FN #endif /* DATA_SIZE >= 16 */ #undef END @@ -241,5 +276,6 @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr, #undef BSWAP #undef ABI_TYPE #undef DATA_TYPE +#undef SDATA_TYPE #undef SUFFIX #undef DATA_SIZE diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h index 2536959a18..1bd39d136d 100644 --- a/accel/tcg/tcg-runtime.h +++ b/accel/tcg/tcg-runtime.h @@ -125,11 +125,19 @@ GEN_ATOMIC_HELPERS(fetch_add) GEN_ATOMIC_HELPERS(fetch_and) GEN_ATOMIC_HELPERS(fetch_or) GEN_ATOMIC_HELPERS(fetch_xor) +GEN_ATOMIC_HELPERS(fetch_smin) +GEN_ATOMIC_HELPERS(fetch_umin) +GEN_ATOMIC_HELPERS(fetch_smax) +GEN_ATOMIC_HELPERS(fetch_umax) GEN_ATOMIC_HELPERS(add_fetch) GEN_ATOMIC_HELPERS(and_fetch) GEN_ATOMIC_HELPERS(or_fetch) GEN_ATOMIC_HELPERS(xor_fetch) +GEN_ATOMIC_HELPERS(smin_fetch) +GEN_ATOMIC_HELPERS(umin_fetch) +GEN_ATOMIC_HELPERS(smax_fetch) +GEN_ATOMIC_HELPERS(umax_fetch) GEN_ATOMIC_HELPERS(xchg) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 70e0c40a1c..8401b37bd4 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -602,34 +602,42 @@ static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s) static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c, bool inf_zero, float_status *s) { + int which; + if (is_snan(a.cls) || is_snan(b.cls) || is_snan(c.cls)) { s->float_exception_flags |= float_flag_invalid; } - if (s->default_nan_mode) { - a.cls = float_class_dnan; - } else { - switch (pickNaNMulAdd(is_qnan(a.cls), is_snan(a.cls), - is_qnan(b.cls), is_snan(b.cls), - is_qnan(c.cls), is_snan(c.cls), - inf_zero, s)) { - case 0: - break; - case 1: - a = b; - break; - case 2: - a = c; - break; - case 3: - a.cls = float_class_dnan; - return a; - default: - g_assert_not_reached(); - } + which = pickNaNMulAdd(is_qnan(a.cls), is_snan(a.cls), + is_qnan(b.cls), is_snan(b.cls), + is_qnan(c.cls), is_snan(c.cls), + inf_zero, s); - a.cls = float_class_msnan; + if (s->default_nan_mode) { + /* Note that this check is after pickNaNMulAdd so that function + * has an opportunity to set the Invalid flag. + */ + a.cls = float_class_dnan; + return a; } + + switch (which) { + case 0: + break; + case 1: + a = b; + break; + case 2: + a = c; + break; + case 3: + a.cls = float_class_dnan; + return a; + default: + g_assert_not_reached(); + } + a.cls = float_class_msnan; + return a; } diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 1e2be20731..9496f331a8 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -36,8 +36,8 @@ #define ARM64_TEXT_OFFSET_OFFSET 8 #define ARM64_MAGIC_OFFSET 56 -static AddressSpace *arm_boot_address_space(ARMCPU *cpu, - const struct arm_boot_info *info) +AddressSpace *arm_boot_address_space(ARMCPU *cpu, + const struct arm_boot_info *info) { /* Return the address space to use for bootloader reads and writes. * We prefer the secure address space if the CPU has it and we're @@ -486,29 +486,8 @@ static void fdt_add_psci_node(void *fdt) qemu_fdt_setprop_cell(fdt, "/psci", "migrate", migrate_fn); } -/** - * load_dtb() - load a device tree binary image into memory - * @addr: the address to load the image at - * @binfo: struct describing the boot environment - * @addr_limit: upper limit of the available memory area at @addr - * @as: address space to load image to - * - * Load a device tree supplied by the machine or by the user with the - * '-dtb' command line option, and put it at offset @addr in target - * memory. - * - * If @addr_limit contains a meaningful value (i.e., it is strictly greater - * than @addr), the device tree is only loaded if its size does not exceed - * the limit. - * - * Returns: the size of the device tree image on success, - * 0 if the image size exceeds the limit, - * -1 on errors. - * - * Note: Must not be called unless have_dtb(binfo) is true. - */ -static int load_dtb(hwaddr addr, const struct arm_boot_info *binfo, - hwaddr addr_limit, AddressSpace *as) +int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, + hwaddr addr_limit, AddressSpace *as) { void *fdt = NULL; int size, rc; @@ -935,7 +914,7 @@ static uint64_t load_aarch64_image(const char *filename, hwaddr mem_base, return size; } -static void arm_load_kernel_notify(Notifier *notifier, void *data) +void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info) { CPUState *cs; int kernel_size; @@ -945,11 +924,6 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) int elf_machine; hwaddr entry; static const ARMInsnFixup *primary_loader; - ArmLoadKernelNotifier *n = DO_UPCAST(ArmLoadKernelNotifier, - notifier, notifier); - ARMCPU *cpu = n->cpu; - struct arm_boot_info *info = - container_of(n, struct arm_boot_info, load_kernel_notifier); AddressSpace *as = arm_boot_address_space(cpu, info); /* The board code is not supposed to set secure_board_setup unless @@ -959,6 +933,7 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) assert(!(info->secure_board_setup && kvm_enabled())); info->dtb_filename = qemu_opt_get(qemu_get_machine_opts(), "dtb"); + info->dtb_limit = 0; /* Load the kernel. */ if (!info->kernel_filename || info->firmware_loaded) { @@ -968,9 +943,7 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) * the kernel is supposed to be loaded by the bootloader), copy the * DTB to the base of RAM for the bootloader to pick up. */ - if (load_dtb(info->loader_start, info, 0, as) < 0) { - exit(1); - } + info->dtb_start = info->loader_start; } if (info->kernel_filename) { @@ -1050,15 +1023,14 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) */ if (elf_low_addr > info->loader_start || elf_high_addr < info->loader_start) { - /* Pass elf_low_addr as address limit to load_dtb if it may be + /* Set elf_low_addr as address limit for arm_load_dtb if it may be * pointing into RAM, otherwise pass '0' (no limit) */ if (elf_low_addr < info->loader_start) { elf_low_addr = 0; } - if (load_dtb(info->loader_start, info, elf_low_addr, as) < 0) { - exit(1); - } + info->dtb_start = info->loader_start; + info->dtb_limit = elf_low_addr; } } entry = elf_entry; @@ -1116,7 +1088,6 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) */ if (have_dtb(info)) { hwaddr align; - hwaddr dtb_start; if (elf_machine == EM_AARCH64) { /* @@ -1136,11 +1107,9 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) } /* Place the DTB after the initrd in memory with alignment. */ - dtb_start = QEMU_ALIGN_UP(info->initrd_start + initrd_size, align); - if (load_dtb(dtb_start, info, 0, as) < 0) { - exit(1); - } - fixupcontext[FIXUP_ARGPTR] = dtb_start; + info->dtb_start = QEMU_ALIGN_UP(info->initrd_start + initrd_size, + align); + fixupcontext[FIXUP_ARGPTR] = info->dtb_start; } else { fixupcontext[FIXUP_ARGPTR] = info->loader_start + KERNEL_ARGS_ADDR; if (info->ram_size >= (1ULL << 32)) { @@ -1173,15 +1142,6 @@ static void arm_load_kernel_notify(Notifier *notifier, void *data) for (cs = first_cpu; cs; cs = CPU_NEXT(cs)) { ARM_CPU(cs)->env.boot_info = info; } -} - -void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info) -{ - CPUState *cs; - - info->load_kernel_notifier.cpu = cpu; - info->load_kernel_notifier.notifier.notify = arm_load_kernel_notify; - qemu_add_machine_init_done_notifier(&info->load_kernel_notifier.notifier); /* CPU objects (unlike devices) are not automatically reset on system * reset, so we must always register a handler to do so. If we're @@ -1191,6 +1151,12 @@ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info) for (cs = first_cpu; cs; cs = CPU_NEXT(cs)) { qemu_register_reset(do_cpu_reset, ARM_CPU(cs)); } + + if (!info->skip_dtb_autoload && have_dtb(info)) { + if (arm_load_dtb(info->dtb_start, info, info->dtb_limit, as) < 0) { + exit(1); + } + } } static const TypeInfo arm_linux_boot_if_info = { diff --git a/hw/arm/iotkit.c b/hw/arm/iotkit.c index c5f0a5b98a..234185e8f7 100644 --- a/hw/arm/iotkit.c +++ b/hw/arm/iotkit.c @@ -517,6 +517,7 @@ static void iotkit_realize(DeviceState *dev, Error **errp) qdev_get_gpio_in(DEVICE(&s->ppc_irq_orgate), i)); qdev_connect_gpio_out_named(DEVICE(ppc), "irq", 0, qdev_get_gpio_in(devs, 0)); + g_free(gpioname); } iotkit_forward_sec_resp_cfg(s); diff --git a/hw/arm/sysbus-fdt.c b/hw/arm/sysbus-fdt.c index d68e3dcdbd..e4c492ea44 100644 --- a/hw/arm/sysbus-fdt.c +++ b/hw/arm/sysbus-fdt.c @@ -49,15 +49,6 @@ typedef struct PlatformBusFDTData { PlatformBusDevice *pbus; } PlatformBusFDTData; -/* - * struct used when calling the machine init done notifier - * that constructs the fdt nodes of platform bus devices - */ -typedef struct PlatformBusFDTNotifierParams { - Notifier notifier; - ARMPlatformBusFDTParams *fdt_params; -} PlatformBusFDTNotifierParams; - /* struct that associates a device type name and a node creation function */ typedef struct NodeCreationPair { const char *typename; @@ -453,42 +444,17 @@ static void add_fdt_node(SysBusDevice *sbdev, void *opaque) exit(1); } -/** - * add_all_platform_bus_fdt_nodes - create all the platform bus nodes - * - * builds the parent platform bus node and all the nodes of dynamic - * sysbus devices attached to it. - */ -static void add_all_platform_bus_fdt_nodes(ARMPlatformBusFDTParams *fdt_params) +void platform_bus_add_all_fdt_nodes(void *fdt, const char *intc, hwaddr addr, + hwaddr bus_size, int irq_start) { const char platcomp[] = "qemu,platform\0simple-bus"; PlatformBusDevice *pbus; DeviceState *dev; gchar *node; - uint64_t addr, size; - int irq_start, dtb_size; - struct arm_boot_info *info = fdt_params->binfo; - const ARMPlatformBusSystemParams *params = fdt_params->system_params; - const char *intc = fdt_params->intc; - void *fdt = info->get_dtb(info, &dtb_size); - - /* - * If the user provided a dtb, we assume the dynamic sysbus nodes - * already are integrated there. This corresponds to a use case where - * the dynamic sysbus nodes are complex and their generation is not yet - * supported. In that case the user can take charge of the guest dt - * while qemu takes charge of the qom stuff. - */ - if (info->dtb_filename) { - return; - } assert(fdt); - node = g_strdup_printf("/platform@%"PRIx64, params->platform_bus_base); - addr = params->platform_bus_base; - size = params->platform_bus_size; - irq_start = params->platform_bus_first_irq; + node = g_strdup_printf("/platform@%"PRIx64, addr); /* Create a /platform node that we can put all devices into */ qemu_fdt_add_subnode(fdt, node); @@ -499,16 +465,13 @@ static void add_all_platform_bus_fdt_nodes(ARMPlatformBusFDTParams *fdt_params) */ qemu_fdt_setprop_cells(fdt, node, "#size-cells", 1); qemu_fdt_setprop_cells(fdt, node, "#address-cells", 1); - qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, size); + qemu_fdt_setprop_cells(fdt, node, "ranges", 0, addr >> 32, addr, bus_size); qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", intc); dev = qdev_find_recursive(sysbus_get_default(), TYPE_PLATFORM_BUS_DEVICE); pbus = PLATFORM_BUS_DEVICE(dev); - /* We can only create dt nodes for dynamic devices when they're ready */ - assert(pbus->done_gathering); - PlatformBusFDTData data = { .fdt = fdt, .irq_start = irq_start, @@ -521,22 +484,3 @@ static void add_all_platform_bus_fdt_nodes(ARMPlatformBusFDTParams *fdt_params) g_free(node); } - -static void platform_bus_fdt_notify(Notifier *notifier, void *data) -{ - PlatformBusFDTNotifierParams *p = DO_UPCAST(PlatformBusFDTNotifierParams, - notifier, notifier); - - add_all_platform_bus_fdt_nodes(p->fdt_params); - g_free(p->fdt_params); - g_free(p); -} - -void arm_register_platform_bus_fdt_creator(ARMPlatformBusFDTParams *fdt_params) -{ - PlatformBusFDTNotifierParams *p = g_new(PlatformBusFDTNotifierParams, 1); - - p->fdt_params = fdt_params; - p->notifier.notify = platform_bus_fdt_notify; - qemu_add_machine_init_done_notifier(&p->notifier); -} diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 11b9f599ca..a3a28e20e8 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -94,8 +94,6 @@ #define PLATFORM_BUS_NUM_IRQS 64 -static ARMPlatformBusSystemParams platform_bus_params; - /* RAM limit in GB. Since VIRT_MEM starts at the 1GB mark, this means * RAM can go up to the 256GB mark, leaving 256GB of the physical * address space unallocated and free for future use between 256G and 512G. @@ -1126,39 +1124,23 @@ static void create_platform_bus(VirtMachineState *vms, qemu_irq *pic) DeviceState *dev; SysBusDevice *s; int i; - ARMPlatformBusFDTParams *fdt_params = g_new(ARMPlatformBusFDTParams, 1); MemoryRegion *sysmem = get_system_memory(); - platform_bus_params.platform_bus_base = vms->memmap[VIRT_PLATFORM_BUS].base; - platform_bus_params.platform_bus_size = vms->memmap[VIRT_PLATFORM_BUS].size; - platform_bus_params.platform_bus_first_irq = vms->irqmap[VIRT_PLATFORM_BUS]; - platform_bus_params.platform_bus_num_irqs = PLATFORM_BUS_NUM_IRQS; - - fdt_params->system_params = &platform_bus_params; - fdt_params->binfo = &vms->bootinfo; - fdt_params->intc = "/intc"; - /* - * register a machine init done notifier that creates the device tree - * nodes of the platform bus and its children dynamic sysbus devices - */ - arm_register_platform_bus_fdt_creator(fdt_params); - dev = qdev_create(NULL, TYPE_PLATFORM_BUS_DEVICE); dev->id = TYPE_PLATFORM_BUS_DEVICE; - qdev_prop_set_uint32(dev, "num_irqs", - platform_bus_params.platform_bus_num_irqs); - qdev_prop_set_uint32(dev, "mmio_size", - platform_bus_params.platform_bus_size); + qdev_prop_set_uint32(dev, "num_irqs", PLATFORM_BUS_NUM_IRQS); + qdev_prop_set_uint32(dev, "mmio_size", vms->memmap[VIRT_PLATFORM_BUS].size); qdev_init_nofail(dev); - s = SYS_BUS_DEVICE(dev); + vms->platform_bus_dev = dev; - for (i = 0; i < platform_bus_params.platform_bus_num_irqs; i++) { - int irqn = platform_bus_params.platform_bus_first_irq + i; + s = SYS_BUS_DEVICE(dev); + for (i = 0; i < PLATFORM_BUS_NUM_IRQS; i++) { + int irqn = vms->irqmap[VIRT_PLATFORM_BUS] + i; sysbus_connect_irq(s, i, pic[irqn]); } memory_region_add_subregion(sysmem, - platform_bus_params.platform_bus_base, + vms->memmap[VIRT_PLATFORM_BUS].base, sysbus_mmio_get_region(s, 0)); } @@ -1229,6 +1211,26 @@ void virt_machine_done(Notifier *notifier, void *data) { VirtMachineState *vms = container_of(notifier, VirtMachineState, machine_done); + ARMCPU *cpu = ARM_CPU(first_cpu); + struct arm_boot_info *info = &vms->bootinfo; + AddressSpace *as = arm_boot_address_space(cpu, info); + + /* + * If the user provided a dtb, we assume the dynamic sysbus nodes + * already are integrated there. This corresponds to a use case where + * the dynamic sysbus nodes are complex and their generation is not yet + * supported. In that case the user can take charge of the guest dt + * while qemu takes charge of the qom stuff. + */ + if (info->dtb_filename == NULL) { + platform_bus_add_all_fdt_nodes(vms->fdt, "/intc", + vms->memmap[VIRT_PLATFORM_BUS].base, + vms->memmap[VIRT_PLATFORM_BUS].size, + vms->irqmap[VIRT_PLATFORM_BUS]); + } + if (arm_load_dtb(info->dtb_start, info, info->dtb_limit, as) < 0) { + exit(1); + } virt_acpi_setup(vms); virt_build_smbios(vms); @@ -1456,8 +1458,7 @@ static void machvirt_init(MachineState *machine) vms->fw_cfg = create_fw_cfg(vms, &address_space_memory); rom_set_fw(vms->fw_cfg); - vms->machine_done.notify = virt_machine_done; - qemu_add_machine_init_done_notifier(&vms->machine_done); + create_platform_bus(vms, pic); vms->bootinfo.ram_size = machine->ram_size; vms->bootinfo.kernel_filename = machine->kernel_filename; @@ -1467,16 +1468,12 @@ static void machvirt_init(MachineState *machine) vms->bootinfo.board_id = -1; vms->bootinfo.loader_start = vms->memmap[VIRT_MEM].base; vms->bootinfo.get_dtb = machvirt_dtb; + vms->bootinfo.skip_dtb_autoload = true; vms->bootinfo.firmware_loaded = firmware_loaded; arm_load_kernel(ARM_CPU(first_cpu), &vms->bootinfo); - /* - * arm_load_kernel machine init done notifier registration must - * happen before the platform_bus_create call. In this latter, - * another notifier is registered which adds platform bus nodes. - * Notifiers are executed in registration reverse order. - */ - create_platform_bus(vms, pic); + vms->machine_done.notify = virt_machine_done; + qemu_add_machine_init_done_notifier(&vms->machine_done); } static bool virt_get_secure(Object *obj, Error **errp) @@ -1627,9 +1624,33 @@ static const CPUArchIdList *virt_possible_cpu_arch_ids(MachineState *ms) return ms->possible_cpus; } +static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + VirtMachineState *vms = VIRT_MACHINE(hotplug_dev); + + if (vms->platform_bus_dev) { + if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE)) { + platform_bus_link_device(PLATFORM_BUS_DEVICE(vms->platform_bus_dev), + SYS_BUS_DEVICE(dev)); + } + } +} + +static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, + DeviceState *dev) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE)) { + return HOTPLUG_HANDLER(machine); + } + + return NULL; +} + static void virt_machine_class_init(ObjectClass *oc, void *data) { MachineClass *mc = MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); mc->init = machvirt_init; /* Start max_cpus at the maximum QEMU supports. We'll further restrict @@ -1648,6 +1669,9 @@ static void virt_machine_class_init(ObjectClass *oc, void *data) mc->cpu_index_to_instance_props = virt_cpu_index_to_props; mc->default_cpu_type = ARM_CPU_TYPE_NAME("cortex-a15"); mc->get_default_cpu_node_id = virt_get_default_cpu_node_id; + assert(!mc->get_hotplug_handler); + mc->get_hotplug_handler = virt_machine_get_hotplug_handler; + hc->plug = virt_machine_device_plug_cb; } static const TypeInfo virt_machine_info = { @@ -1657,6 +1681,10 @@ static const TypeInfo virt_machine_info = { .instance_size = sizeof(VirtMachineState), .class_size = sizeof(VirtMachineClass), .class_init = virt_machine_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + }, }; static void machvirt_machine_init(void) diff --git a/hw/core/platform-bus.c b/hw/core/platform-bus.c index 33d32fbf22..807cb5ccda 100644 --- a/hw/core/platform-bus.c +++ b/hw/core/platform-bus.c @@ -103,7 +103,6 @@ static void plaform_bus_refresh_irqs(PlatformBusDevice *pbus) { bitmap_zero(pbus->used_irqs, pbus->num_irqs); foreach_dynamic_sysbus_device(platform_bus_count_irqs, pbus); - pbus->done_gathering = true; } static void platform_bus_map_irq(PlatformBusDevice *pbus, SysBusDevice *sbdev, @@ -163,12 +162,11 @@ static void platform_bus_map_mmio(PlatformBusDevice *pbus, SysBusDevice *sbdev, } /* - * For each sysbus device, look for unassigned IRQ lines as well as - * unassociated MMIO regions. Connect them to the platform bus if available. + * Look for unassigned IRQ lines as well as unassociated MMIO regions. + * Connect them to the platform bus if available. */ -static void link_sysbus_device(SysBusDevice *sbdev, void *opaque) +void platform_bus_link_device(PlatformBusDevice *pbus, SysBusDevice *sbdev) { - PlatformBusDevice *pbus = opaque; int i; for (i = 0; sysbus_has_irq(sbdev, i); i++) { @@ -180,19 +178,6 @@ static void link_sysbus_device(SysBusDevice *sbdev, void *opaque) } } -static void platform_bus_init_notify(Notifier *notifier, void *data) -{ - PlatformBusDevice *pb = container_of(notifier, PlatformBusDevice, notifier); - - /* - * Generate a bitmap of used IRQ lines, as the user might have specified - * them on the command line. - */ - plaform_bus_refresh_irqs(pb); - - foreach_dynamic_sysbus_device(link_sysbus_device, pb); -} - static void platform_bus_realize(DeviceState *dev, Error **errp) { PlatformBusDevice *pbus; @@ -211,12 +196,8 @@ static void platform_bus_realize(DeviceState *dev, Error **errp) sysbus_init_irq(d, &pbus->irqs[i]); } - /* - * Register notifier that allows us to gather dangling devices once the - * machine is completely assembled - */ - pbus->notifier.notify = platform_bus_init_notify; - qemu_add_machine_init_done_notifier(&pbus->notifier); + /* some devices might be initialized before so update used IRQs map */ + plaform_bus_refresh_irqs(pbus); } static Property platform_bus_properties[] = { diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 868893d0a1..d768930d02 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -2051,15 +2051,12 @@ static void pc_machine_device_unplug_cb(HotplugHandler *hotplug_dev, static HotplugHandler *pc_get_hotpug_handler(MachineState *machine, DeviceState *dev) { - PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(machine); - if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM) || object_dynamic_cast(OBJECT(dev), TYPE_CPU)) { return HOTPLUG_HANDLER(machine); } - return pcmc->get_hotplug_handler ? - pcmc->get_hotplug_handler(machine, dev) : NULL; + return NULL; } static void @@ -2339,7 +2336,6 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); NMIClass *nc = NMI_CLASS(oc); - pcmc->get_hotplug_handler = mc->get_hotplug_handler; pcmc->pci_enabled = true; pcmc->has_acpi_build = true; pcmc->rsdp_in_ram = true; @@ -2354,6 +2350,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->acpi_data_size = 0x20000 + 0x8000; pcmc->save_tsc_khz = true; pcmc->linuxboot_dma_enabled = true; + assert(!mc->get_hotplug_handler); mc->get_hotplug_handler = pc_get_hotpug_handler; mc->cpu_index_to_instance_props = pc_cpu_index_to_props; mc->get_default_cpu_node_id = pc_get_default_cpu_node_id; diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c index 748a8d213b..826053edc8 100644 --- a/hw/ppc/e500.c +++ b/hw/ppc/e500.c @@ -222,16 +222,15 @@ static void sysbus_device_create_devtree(SysBusDevice *sbdev, void *opaque) } } -static void platform_bus_create_devtree(const PPCE500MachineClass *pmc, +static void platform_bus_create_devtree(PPCE500MachineState *pms, void *fdt, const char *mpic) { + const PPCE500MachineClass *pmc = PPCE500_MACHINE_GET_CLASS(pms); gchar *node = g_strdup_printf("/platform@%"PRIx64, pmc->platform_bus_base); const char platcomp[] = "qemu,platform\0simple-bus"; uint64_t addr = pmc->platform_bus_base; uint64_t size = pmc->platform_bus_size; int irq_start = pmc->platform_bus_first_irq; - PlatformBusDevice *pbus; - DeviceState *dev; /* Create a /platform node that we can put all devices into */ @@ -246,22 +245,17 @@ static void platform_bus_create_devtree(const PPCE500MachineClass *pmc, qemu_fdt_setprop_phandle(fdt, node, "interrupt-parent", mpic); - dev = qdev_find_recursive(sysbus_get_default(), TYPE_PLATFORM_BUS_DEVICE); - pbus = PLATFORM_BUS_DEVICE(dev); + /* Create dt nodes for dynamic devices */ + PlatformDevtreeData data = { + .fdt = fdt, + .mpic = mpic, + .irq_start = irq_start, + .node = node, + .pbus = pms->pbus_dev, + }; - /* We can only create dt nodes for dynamic devices when they're ready */ - if (pbus->done_gathering) { - PlatformDevtreeData data = { - .fdt = fdt, - .mpic = mpic, - .irq_start = irq_start, - .node = node, - .pbus = pbus, - }; - - /* Loop through all dynamic sysbus devices and create nodes for them */ - foreach_dynamic_sysbus_device(sysbus_device_create_devtree, &data); - } + /* Loop through all dynamic sysbus devices and create nodes for them */ + foreach_dynamic_sysbus_device(sysbus_device_create_devtree, &data); g_free(node); } @@ -533,8 +527,8 @@ static int ppce500_load_device_tree(PPCE500MachineState *pms, } g_free(soc); - if (pmc->has_platform_bus) { - platform_bus_create_devtree(pmc, fdt, mpic); + if (pms->pbus_dev) { + platform_bus_create_devtree(pms, fdt, mpic); } g_free(mpic); @@ -953,8 +947,9 @@ void ppce500_init(MachineState *machine) qdev_prop_set_uint32(dev, "num_irqs", pmc->platform_bus_num_irqs); qdev_prop_set_uint32(dev, "mmio_size", pmc->platform_bus_size); qdev_init_nofail(dev); - s = SYS_BUS_DEVICE(dev); + pms->pbus_dev = PLATFORM_BUS_DEVICE(dev); + s = SYS_BUS_DEVICE(pms->pbus_dev); for (i = 0; i < pmc->platform_bus_num_irqs; i++) { int irqn = pmc->platform_bus_first_irq + i; sysbus_connect_irq(s, i, qdev_get_gpio_in(mpicdev, irqn)); @@ -1097,6 +1092,7 @@ static const TypeInfo ppce500_info = { .name = TYPE_PPCE500_MACHINE, .parent = TYPE_MACHINE, .abstract = true, + .instance_size = sizeof(PPCE500MachineState), .class_size = sizeof(PPCE500MachineClass), }; diff --git a/hw/ppc/e500.h b/hw/ppc/e500.h index 621403bd24..3fd9f825ca 100644 --- a/hw/ppc/e500.h +++ b/hw/ppc/e500.h @@ -2,11 +2,16 @@ #define PPCE500_H #include "hw/boards.h" +#include "hw/platform-bus.h" typedef struct PPCE500MachineState { /*< private >*/ MachineState parent_obj; + /* points to instance of TYPE_PLATFORM_BUS_DEVICE if + * board supports dynamic sysbus devices + */ + PlatformBusDevice *pbus_dev; } PPCE500MachineState; typedef struct PPCE500MachineClass { diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c index f69aadb666..d8e3f2066e 100644 --- a/hw/ppc/e500plat.c +++ b/hw/ppc/e500plat.c @@ -43,13 +43,41 @@ static void e500plat_init(MachineState *machine) ppce500_init(machine); } +static void e500plat_machine_device_plug_cb(HotplugHandler *hotplug_dev, + DeviceState *dev, Error **errp) +{ + PPCE500MachineState *pms = PPCE500_MACHINE(hotplug_dev); + + if (pms->pbus_dev) { + if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE)) { + platform_bus_link_device(pms->pbus_dev, SYS_BUS_DEVICE(dev)); + } + } +} + +static +HotplugHandler *e500plat_machine_get_hotpug_handler(MachineState *machine, + DeviceState *dev) +{ + if (object_dynamic_cast(OBJECT(dev), TYPE_SYS_BUS_DEVICE)) { + return HOTPLUG_HANDLER(machine); + } + + return NULL; +} + #define TYPE_E500PLAT_MACHINE MACHINE_TYPE_NAME("ppce500") static void e500plat_machine_class_init(ObjectClass *oc, void *data) { PPCE500MachineClass *pmc = PPCE500_MACHINE_CLASS(oc); + HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); MachineClass *mc = MACHINE_CLASS(oc); + assert(!mc->get_hotplug_handler); + mc->get_hotplug_handler = e500plat_machine_get_hotpug_handler; + hc->plug = e500plat_machine_device_plug_cb; + pmc->pci_first_slot = 0x1; pmc->pci_nr_slots = PCI_SLOT_MAX - 1; pmc->fixup_devtree = e500plat_fixup_devtree; @@ -77,6 +105,10 @@ static const TypeInfo e500plat_info = { .name = TYPE_E500PLAT_MACHINE, .parent = TYPE_PPCE500_MACHINE, .class_init = e500plat_machine_class_init, + .interfaces = (InterfaceInfo[]) { + { TYPE_HOTPLUG_HANDLER }, + { } + } }; static void e500plat_register_types(void) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index a1abcba6ad..ebf30dd60b 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -3980,6 +3980,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data) mc->kvm_type = spapr_kvm_type; machine_class_allow_dynamic_sysbus_dev(mc, TYPE_SPAPR_PCI_HOST_BRIDGE); mc->pci_allow_0_address = true; + assert(!mc->get_hotplug_handler); mc->get_hotplug_handler = spapr_get_hotplug_handler; hc->pre_plug = spapr_machine_device_pre_plug; hc->plug = spapr_machine_device_plug; diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 100dfdc96d..5796e24bd8 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -491,6 +491,7 @@ static void ccw_machine_class_init(ObjectClass *oc, void *data) mc->no_sdcard = 1; mc->max_cpus = S390_MAX_CPUS; mc->has_hotpluggable_cpus = true; + assert(!mc->get_hotplug_handler); mc->get_hotplug_handler = s390_get_hotplug_handler; mc->cpu_index_to_instance_props = s390_cpu_index_to_props; mc->possible_cpu_arch_ids = s390_possible_cpu_arch_ids; diff --git a/include/hw/arm/arm.h b/include/hw/arm/arm.h index ce769bde6a..70fa2287e2 100644 --- a/include/hw/arm/arm.h +++ b/include/hw/arm/arm.h @@ -39,15 +39,6 @@ DeviceState *armv7m_init(MemoryRegion *system_memory, int mem_size, int num_irq, */ void armv7m_load_kernel(ARMCPU *cpu, const char *kernel_filename, int mem_size); -/* - * struct used as a parameter of the arm_load_kernel machine init - * done notifier - */ -typedef struct { - Notifier notifier; /* actual notifier */ - ARMCPU *cpu; /* handle to the first cpu object */ -} ArmLoadKernelNotifier; - /* arm_boot.c */ struct arm_boot_info { uint64_t ram_size; @@ -56,6 +47,13 @@ struct arm_boot_info { const char *initrd_filename; const char *dtb_filename; hwaddr loader_start; + hwaddr dtb_start; + hwaddr dtb_limit; + /* If set to True, arm_load_kernel() will not load DTB. + * It allows board to load DTB manually later. + * (default: False) + */ + bool skip_dtb_autoload; /* multicore boards that use the default secondary core boot functions * need to put the address of the secondary boot code, the boot reg, * and the GIC address in the next 3 values, respectively. boards that @@ -94,8 +92,6 @@ struct arm_boot_info { * the user it should implement this hook. */ void (*modify_dtb)(const struct arm_boot_info *info, void *fdt); - /* machine init done notifier executing arm_load_dtb */ - ArmLoadKernelNotifier load_kernel_notifier; /* Used internally by arm_boot.c */ int is_linux; hwaddr initrd_start; @@ -143,6 +139,33 @@ struct arm_boot_info { */ void arm_load_kernel(ARMCPU *cpu, struct arm_boot_info *info); +AddressSpace *arm_boot_address_space(ARMCPU *cpu, + const struct arm_boot_info *info); + +/** + * arm_load_dtb() - load a device tree binary image into memory + * @addr: the address to load the image at + * @binfo: struct describing the boot environment + * @addr_limit: upper limit of the available memory area at @addr + * @as: address space to load image to + * + * Load a device tree supplied by the machine or by the user with the + * '-dtb' command line option, and put it at offset @addr in target + * memory. + * + * If @addr_limit contains a meaningful value (i.e., it is strictly greater + * than @addr), the device tree is only loaded if its size does not exceed + * the limit. + * + * Returns: the size of the device tree image on success, + * 0 if the image size exceeds the limit, + * -1 on errors. + * + * Note: Must not be called unless have_dtb(binfo) is true. + */ +int arm_load_dtb(hwaddr addr, const struct arm_boot_info *binfo, + hwaddr addr_limit, AddressSpace *as); + /* Write a secure board setup routine with a dummy handler for SMCs */ void arm_write_secure_board_setup_dummy_smc(ARMCPU *cpu, const struct arm_boot_info *info, diff --git a/include/hw/arm/sysbus-fdt.h b/include/hw/arm/sysbus-fdt.h index e15bb81807..340c382cdd 100644 --- a/include/hw/arm/sysbus-fdt.h +++ b/include/hw/arm/sysbus-fdt.h @@ -24,37 +24,14 @@ #ifndef HW_ARM_SYSBUS_FDT_H #define HW_ARM_SYSBUS_FDT_H -#include "hw/arm/arm.h" -#include "qemu-common.h" -#include "hw/sysbus.h" - -/* - * struct that contains dimensioning parameters of the platform bus - */ -typedef struct { - hwaddr platform_bus_base; /* start address of the bus */ - hwaddr platform_bus_size; /* size of the bus */ - int platform_bus_first_irq; /* first hwirq assigned to the bus */ - int platform_bus_num_irqs; /* number of hwirq assigned to the bus */ -} ARMPlatformBusSystemParams; - -/* - * struct that contains all relevant info to build the fdt nodes of - * platform bus and attached dynamic sysbus devices - * in the future might be augmented with additional info - * such as PHY, CLK handles ... - */ -typedef struct { - const ARMPlatformBusSystemParams *system_params; - struct arm_boot_info *binfo; - const char *intc; /* parent interrupt controller name */ -} ARMPlatformBusFDTParams; +#include "exec/hwaddr.h" /** - * arm_register_platform_bus_fdt_creator - register a machine init done - * notifier that creates the device tree nodes of the platform bus and - * associated dynamic sysbus devices + * platform_bus_add_all_fdt_nodes - create all the platform bus nodes + * + * builds the parent platform bus node and all the nodes of dynamic + * sysbus devices attached to it. */ -void arm_register_platform_bus_fdt_creator(ARMPlatformBusFDTParams *fdt_params); - +void platform_bus_add_all_fdt_nodes(void *fdt, const char *intc, hwaddr addr, + hwaddr bus_size, int irq_start); #endif diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h index 886372cdbb..4ac7ef6a37 100644 --- a/include/hw/arm/virt.h +++ b/include/hw/arm/virt.h @@ -99,6 +99,7 @@ typedef struct { typedef struct { MachineState parent; Notifier machine_done; + DeviceState *platform_bus_dev; FWCfgState *fw_cfg; bool secure; bool highmem; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 2e834e6ded..2a98e3ad68 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -83,10 +83,6 @@ struct PCMachineState { /** * PCMachineClass: * - * Methods: - * - * @get_hotplug_handler: pointer to parent class callback @get_hotplug_handler - * * Compat fields: * * @enforce_aligned_dimm: check that DIMM's address/size is aligned by @@ -106,10 +102,6 @@ struct PCMachineClass { /*< public >*/ - /* Methods: */ - HotplugHandler *(*get_hotplug_handler)(MachineState *machine, - DeviceState *dev); - /* Device configuration: */ bool pci_enabled; bool kvmclock_enabled; diff --git a/include/hw/platform-bus.h b/include/hw/platform-bus.h index a00775cba6..19e20c57ce 100644 --- a/include/hw/platform-bus.h +++ b/include/hw/platform-bus.h @@ -37,8 +37,6 @@ typedef struct PlatformBusDevice PlatformBusDevice; struct PlatformBusDevice { /*< private >*/ SysBusDevice parent_obj; - Notifier notifier; - bool done_gathering; /*< public >*/ uint32_t mmio_size; @@ -54,4 +52,6 @@ int platform_bus_get_irqn(PlatformBusDevice *platform_bus, SysBusDevice *sbdev, hwaddr platform_bus_get_mmio_addr(PlatformBusDevice *pbus, SysBusDevice *sbdev, int n); +void platform_bus_link_device(PlatformBusDevice *pbus, SysBusDevice *sbdev); + #endif /* HW_PLATFORM_BUS_H */ diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index d73c9e14d7..9ed39effd3 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -187,7 +187,7 @@ /* Returns the eventual value, failed or not */ #define atomic_cmpxchg__nocheck(ptr, old, new) ({ \ typeof_strip_qual(*ptr) _old = (old); \ - __atomic_compare_exchange_n(ptr, &_old, new, false, \ + (void)__atomic_compare_exchange_n(ptr, &_old, new, false, \ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); \ _old; \ }) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 36d52194bc..13bc78d0c8 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -581,6 +581,7 @@ static uint32_t get_elf_hwcap(void) GET_FEATURE(ARM_FEATURE_V8_SHA512, ARM_HWCAP_A64_SHA512); GET_FEATURE(ARM_FEATURE_V8_FP16, ARM_HWCAP_A64_FPHP | ARM_HWCAP_A64_ASIMDHP); + GET_FEATURE(ARM_FEATURE_V8_ATOMICS, ARM_HWCAP_A64_ATOMICS); GET_FEATURE(ARM_FEATURE_V8_RDM, ARM_HWCAP_A64_ASIMDRDM); GET_FEATURE(ARM_FEATURE_V8_FCMA, ARM_HWCAP_A64_FCMA); #undef GET_FEATURE diff --git a/target/arm/cpu.h b/target/arm/cpu.h index 44e6b77151..3b086be570 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1449,6 +1449,7 @@ enum arm_features { ARM_FEATURE_V8_SHA3, /* implements SHA3 part of v8 Crypto Extensions */ ARM_FEATURE_V8_SM3, /* implements SM3 part of v8 Crypto Extensions */ ARM_FEATURE_V8_SM4, /* implements SM4 part of v8 Crypto Extensions */ + ARM_FEATURE_V8_ATOMICS, /* ARMv8.1-Atomics feature */ ARM_FEATURE_V8_RDM, /* implements v8.1 simd round multiply */ ARM_FEATURE_V8_FP16, /* implements v8.2 half-precision float */ ARM_FEATURE_V8_FCMA, /* has complex number part of v8.3 extensions. */ diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c index 991d764674..c50dcd4077 100644 --- a/target/arm/cpu64.c +++ b/target/arm/cpu64.c @@ -248,6 +248,7 @@ static void aarch64_max_initfn(Object *obj) set_feature(&cpu->env, ARM_FEATURE_V8_SM4); set_feature(&cpu->env, ARM_FEATURE_V8_PMULL); set_feature(&cpu->env, ARM_FEATURE_CRC); + set_feature(&cpu->env, ARM_FEATURE_V8_ATOMICS); set_feature(&cpu->env, ARM_FEATURE_V8_RDM); set_feature(&cpu->env, ARM_FEATURE_V8_FP16); set_feature(&cpu->env, ARM_FEATURE_V8_FCMA); diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c index afb25ad20c..549ed3513e 100644 --- a/target/arm/helper-a64.c +++ b/target/arm/helper-a64.c @@ -636,6 +636,49 @@ uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC()); } +/* Writes back the old data into Rs. */ +void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) +{ + uintptr_t ra = GETPC(); +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); +#else + Int128 oldv, cmpv, newv; + + cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]); + newv = int128_make128(new_lo, new_hi); + + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); + + env->xregs[rs] = int128_getlo(oldv); + env->xregs[rs + 1] = int128_gethi(oldv); +#endif +} + +void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr, + uint64_t new_hi, uint64_t new_lo) +{ + uintptr_t ra = GETPC(); +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); +#else + Int128 oldv, cmpv, newv; + + cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]); + newv = int128_make128(new_lo, new_hi); + + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + + env->xregs[rs + 1] = int128_getlo(oldv); + env->xregs[rs] = int128_gethi(oldv); +#endif +} + /* * AdvSIMD half-precision */ diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h index ef4ddfe9d8..b8028ac98c 100644 --- a/target/arm/helper-a64.h +++ b/target/arm/helper-a64.h @@ -51,6 +51,8 @@ DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64) DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG, i64, env, i64, i64, i64) +DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64) +DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64) DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) diff --git a/target/arm/helper.c b/target/arm/helper.c index 0fef5d4d06..817f9d81a0 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -11420,11 +11420,60 @@ VFP_CONV_FIX_A64(sq, s, 32, 64, int64) VFP_CONV_FIX(uh, s, 32, 32, uint16) VFP_CONV_FIX(ul, s, 32, 32, uint32) VFP_CONV_FIX_A64(uq, s, 32, 64, uint64) -VFP_CONV_FIX_A64(sl, h, 16, 32, int32) -VFP_CONV_FIX_A64(ul, h, 16, 32, uint32) + #undef VFP_CONV_FIX #undef VFP_CONV_FIX_FLOAT #undef VFP_CONV_FLOAT_FIX_ROUND +#undef VFP_CONV_FIX_A64 + +/* Conversion to/from f16 can overflow to infinity before/after scaling. + * Therefore we convert to f64 (which does not round), scale, + * and then convert f64 to f16 (which may round). + */ + +static float16 do_postscale_fp16(float64 f, int shift, float_status *fpst) +{ + return float64_to_float16(float64_scalbn(f, -shift, fpst), true, fpst); +} + +float16 HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst) +{ + return do_postscale_fp16(int32_to_float64(x, fpst), shift, fpst); +} + +float16 HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst) +{ + return do_postscale_fp16(uint32_to_float64(x, fpst), shift, fpst); +} + +static float64 do_prescale_fp16(float16 f, int shift, float_status *fpst) +{ + if (unlikely(float16_is_any_nan(f))) { + float_raise(float_flag_invalid, fpst); + return 0; + } else { + int old_exc_flags = get_float_exception_flags(fpst); + float64 ret; + + ret = float16_to_float64(f, true, fpst); + ret = float64_scalbn(ret, shift, fpst); + old_exc_flags |= get_float_exception_flags(fpst) + & float_flag_input_denormal; + set_float_exception_flags(old_exc_flags, fpst); + + return ret; + } +} + +uint32_t HELPER(vfp_toshh)(float16 x, uint32_t shift, void *fpst) +{ + return float64_to_int16(do_prescale_fp16(x, shift, fpst), fpst); +} + +uint32_t HELPER(vfp_touhh)(float16 x, uint32_t shift, void *fpst) +{ + return float64_to_uint16(do_prescale_fp16(x, shift, fpst), fpst); +} /* Set the current fp rounding mode and return the old one. * The argument is a softfloat float_round_ value. diff --git a/target/arm/helper.h b/target/arm/helper.h index 34e8cc8904..1969b37f2d 100644 --- a/target/arm/helper.h +++ b/target/arm/helper.h @@ -149,8 +149,8 @@ DEF_HELPER_3(vfp_toshd_round_to_zero, i64, f64, i32, ptr) DEF_HELPER_3(vfp_tosld_round_to_zero, i64, f64, i32, ptr) DEF_HELPER_3(vfp_touhd_round_to_zero, i64, f64, i32, ptr) DEF_HELPER_3(vfp_tould_round_to_zero, i64, f64, i32, ptr) -DEF_HELPER_3(vfp_toulh, i32, f16, i32, ptr) -DEF_HELPER_3(vfp_toslh, i32, f16, i32, ptr) +DEF_HELPER_3(vfp_touhh, i32, f16, i32, ptr) +DEF_HELPER_3(vfp_toshh, i32, f16, i32, ptr) DEF_HELPER_3(vfp_toshs, i32, f32, i32, ptr) DEF_HELPER_3(vfp_tosls, i32, f32, i32, ptr) DEF_HELPER_3(vfp_tosqs, i64, f32, i32, ptr) diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 1e7c150514..4d1b220cc6 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -84,6 +84,7 @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr); typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); +typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp); /* Note that the gvec expanders operate on offsets + sizes. */ typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t); @@ -2113,6 +2114,103 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, tcg_gen_movi_i64(cpu_exclusive_addr, -1); } +static void gen_compare_and_swap(DisasContext *s, int rs, int rt, + int rn, int size) +{ + TCGv_i64 tcg_rs = cpu_reg(s, rs); + TCGv_i64 tcg_rt = cpu_reg(s, rt); + int memidx = get_mem_index(s); + TCGv_i64 addr = cpu_reg_sp(s, rn); + + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx, + size | MO_ALIGN | s->be_data); +} + +static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt, + int rn, int size) +{ + TCGv_i64 s1 = cpu_reg(s, rs); + TCGv_i64 s2 = cpu_reg(s, rs + 1); + TCGv_i64 t1 = cpu_reg(s, rt); + TCGv_i64 t2 = cpu_reg(s, rt + 1); + TCGv_i64 addr = cpu_reg_sp(s, rn); + int memidx = get_mem_index(s); + + if (rn == 31) { + gen_check_sp_alignment(s); + } + + if (size == 2) { + TCGv_i64 cmp = tcg_temp_new_i64(); + TCGv_i64 val = tcg_temp_new_i64(); + + if (s->be_data == MO_LE) { + tcg_gen_concat32_i64(val, t1, t2); + tcg_gen_concat32_i64(cmp, s1, s2); + } else { + tcg_gen_concat32_i64(val, t2, t1); + tcg_gen_concat32_i64(cmp, s2, s1); + } + + tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx, + MO_64 | MO_ALIGN | s->be_data); + tcg_temp_free_i64(val); + + if (s->be_data == MO_LE) { + tcg_gen_extr32_i64(s1, s2, cmp); + } else { + tcg_gen_extr32_i64(s2, s1, cmp); + } + tcg_temp_free_i64(cmp); + } else if (tb_cflags(s->base.tb) & CF_PARALLEL) { + TCGv_i32 tcg_rs = tcg_const_i32(rs); + + if (s->be_data == MO_LE) { + gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2); + } else { + gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2); + } + tcg_temp_free_i32(tcg_rs); + } else { + TCGv_i64 d1 = tcg_temp_new_i64(); + TCGv_i64 d2 = tcg_temp_new_i64(); + TCGv_i64 a2 = tcg_temp_new_i64(); + TCGv_i64 c1 = tcg_temp_new_i64(); + TCGv_i64 c2 = tcg_temp_new_i64(); + TCGv_i64 zero = tcg_const_i64(0); + + /* Load the two words, in memory order. */ + tcg_gen_qemu_ld_i64(d1, addr, memidx, + MO_64 | MO_ALIGN_16 | s->be_data); + tcg_gen_addi_i64(a2, addr, 8); + tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data); + + /* Compare the two words, also in memory order. */ + tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1); + tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2); + tcg_gen_and_i64(c2, c2, c1); + + /* If compare equal, write back new data, else write back old data. */ + tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1); + tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2); + tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data); + tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data); + tcg_temp_free_i64(a2); + tcg_temp_free_i64(c1); + tcg_temp_free_i64(c2); + tcg_temp_free_i64(zero); + + /* Write back the data from memory to Rs. */ + tcg_gen_mov_i64(s1, d1); + tcg_gen_mov_i64(s2, d2); + tcg_temp_free_i64(d1); + tcg_temp_free_i64(d2); + } +} + /* Update the Sixty-Four bit (SF) registersize. This logic is derived * from the ARMv8 specs for LDR (Shared decode for all encodings). */ @@ -2147,62 +2245,114 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn) int rt = extract32(insn, 0, 5); int rn = extract32(insn, 5, 5); int rt2 = extract32(insn, 10, 5); - int is_lasr = extract32(insn, 15, 1); int rs = extract32(insn, 16, 5); - int is_pair = extract32(insn, 21, 1); - int is_store = !extract32(insn, 22, 1); - int is_excl = !extract32(insn, 23, 1); + int is_lasr = extract32(insn, 15, 1); + int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr; int size = extract32(insn, 30, 2); TCGv_i64 tcg_addr; - if ((!is_excl && !is_pair && !is_lasr) || - (!is_excl && is_pair) || - (is_pair && size < 2)) { - unallocated_encoding(s); + switch (o2_L_o1_o0) { + case 0x0: /* STXR */ + case 0x1: /* STLXR */ + if (rn == 31) { + gen_check_sp_alignment(s); + } + if (is_lasr) { + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false); return; - } - if (rn == 31) { - gen_check_sp_alignment(s); - } - tcg_addr = read_cpu_reg_sp(s, rn, 1); - - /* Note that since TCG is single threaded load-acquire/store-release - * semantics require no extra if (is_lasr) { ... } handling. - */ - - if (is_excl) { - if (!is_store) { - s->is_ldex = true; - gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair); - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); - } - } else { - if (is_lasr) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - } - gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair); + case 0x4: /* LDXR */ + case 0x5: /* LDAXR */ + if (rn == 31) { + gen_check_sp_alignment(s); } - } else { - TCGv_i64 tcg_rt = cpu_reg(s, rt); - bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0); + tcg_addr = read_cpu_reg_sp(s, rn, 1); + s->is_ldex = true; + gen_load_exclusive(s, rt, rt2, tcg_addr, size, false); + if (is_lasr) { + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + } + return; + case 0x9: /* STLR */ /* Generate ISS for non-exclusive accesses including LASR. */ - if (is_store) { + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); + tcg_addr = read_cpu_reg_sp(s, rn, 1); + do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt, + disas_ldst_compute_iss_sf(size, false, 0), is_lasr); + return; + + case 0xd: /* LDAR */ + /* Generate ISS for non-exclusive accesses including LASR. */ + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt, + disas_ldst_compute_iss_sf(size, false, 0), is_lasr); + tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); + return; + + case 0x2: case 0x3: /* CASP / STXP */ + if (size & 2) { /* STXP / STLXP */ + if (rn == 31) { + gen_check_sp_alignment(s); + } if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); } - do_gpr_st(s, tcg_rt, tcg_addr, size, - true, rt, iss_sf, is_lasr); - } else { - do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false, - true, rt, iss_sf, is_lasr); + tcg_addr = read_cpu_reg_sp(s, rn, 1); + gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true); + return; + } + if (rt2 == 31 + && ((rt | rs) & 1) == 0 + && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + /* CASP / CASPL */ + gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); + return; + } + break; + + case 0x6: case 0x7: /* CASPA / LDXP */ + if (size & 2) { /* LDXP / LDAXP */ + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_addr = read_cpu_reg_sp(s, rn, 1); + s->is_ldex = true; + gen_load_exclusive(s, rt, rt2, tcg_addr, size, true); if (is_lasr) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ); } + return; } + if (rt2 == 31 + && ((rt | rs) & 1) == 0 + && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + /* CASPA / CASPAL */ + gen_compare_and_swap_pair(s, rs, rt, rn, size | 2); + return; + } + break; + + case 0xa: /* CAS */ + case 0xb: /* CASL */ + case 0xe: /* CASA */ + case 0xf: /* CASAL */ + if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) { + gen_compare_and_swap(s, rs, rt, rn, size); + return; + } + break; } + unallocated_encoding(s); } /* @@ -2715,6 +2865,88 @@ static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn, } } +/* Atomic memory operations + * + * 31 30 27 26 24 22 21 16 15 12 10 5 0 + * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+ + * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn | Rt | + * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+ + * + * Rt: the result register + * Rn: base address or SP + * Rs: the source register for the operation + * V: vector flag (always 0 as of v8.3) + * A: acquire flag + * R: release flag + */ +static void disas_ldst_atomic(DisasContext *s, uint32_t insn, + int size, int rt, bool is_vector) +{ + int rs = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int o3_opc = extract32(insn, 12, 4); + int feature = ARM_FEATURE_V8_ATOMICS; + TCGv_i64 tcg_rn, tcg_rs; + AtomicThreeOpFn *fn; + + if (is_vector) { + unallocated_encoding(s); + return; + } + switch (o3_opc) { + case 000: /* LDADD */ + fn = tcg_gen_atomic_fetch_add_i64; + break; + case 001: /* LDCLR */ + fn = tcg_gen_atomic_fetch_and_i64; + break; + case 002: /* LDEOR */ + fn = tcg_gen_atomic_fetch_xor_i64; + break; + case 003: /* LDSET */ + fn = tcg_gen_atomic_fetch_or_i64; + break; + case 004: /* LDSMAX */ + fn = tcg_gen_atomic_fetch_smax_i64; + break; + case 005: /* LDSMIN */ + fn = tcg_gen_atomic_fetch_smin_i64; + break; + case 006: /* LDUMAX */ + fn = tcg_gen_atomic_fetch_umax_i64; + break; + case 007: /* LDUMIN */ + fn = tcg_gen_atomic_fetch_umin_i64; + break; + case 010: /* SWP */ + fn = tcg_gen_atomic_xchg_i64; + break; + default: + unallocated_encoding(s); + return; + } + if (!arm_dc_feature(s, feature)) { + unallocated_encoding(s); + return; + } + + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_rn = cpu_reg_sp(s, rn); + tcg_rs = read_cpu_reg(s, rs, true); + + if (o3_opc == 1) { /* LDCLR */ + tcg_gen_not_i64(tcg_rs, tcg_rs); + } + + /* The tcg atomic primitives are all full barriers. Therefore we + * can ignore the Acquire and Release bits of this instruction. + */ + fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s), + s->be_data | size | MO_ALIGN); +} + /* Load/store register (all forms) */ static void disas_ldst_reg(DisasContext *s, uint32_t insn) { @@ -2725,23 +2957,28 @@ static void disas_ldst_reg(DisasContext *s, uint32_t insn) switch (extract32(insn, 24, 2)) { case 0: - if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) { - disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector); - } else { + if (extract32(insn, 21, 1) == 0) { /* Load/store register (unscaled immediate) * Load/store immediate pre/post-indexed * Load/store register unprivileged */ disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector); + return; + } + switch (extract32(insn, 10, 2)) { + case 0: + disas_ldst_atomic(s, insn, size, rt, is_vector); + return; + case 2: + disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector); + return; } break; case 1: disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector); - break; - default: - unallocated_encoding(s); - break; + return; } + unallocated_encoding(s); } /* AdvSIMD load/store multiple structures @@ -5444,31 +5681,24 @@ static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof) if (itof) { TCGv_i64 tcg_rn = cpu_reg(s, rn); + TCGv_i64 tmp; switch (type) { case 0: - { /* 32 bit */ - TCGv_i64 tmp = tcg_temp_new_i64(); + tmp = tcg_temp_new_i64(); tcg_gen_ext32u_i64(tmp, tcg_rn); - tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64)); - tcg_gen_movi_i64(tmp, 0); - tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd)); + write_fp_dreg(s, rd, tmp); tcg_temp_free_i64(tmp); break; - } case 1: - { /* 64 bit */ - TCGv_i64 tmp = tcg_const_i64(0); - tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64)); - tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd)); - tcg_temp_free_i64(tmp); + write_fp_dreg(s, rd, tcg_rn); break; - } case 2: /* 64 bit to top half. */ tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd)); + clear_vec_high(s, true, rd); break; } } else { @@ -6021,15 +6251,18 @@ static void disas_simd_across_lanes(DisasContext *s, uint32_t insn) tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt); break; case 0x0a: /* SMAXV / UMAXV */ - tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE, - tcg_res, - tcg_res, tcg_elt, tcg_res, tcg_elt); + if (is_u) { + tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt); + } else { + tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt); + } break; case 0x1a: /* SMINV / UMINV */ - tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE, - tcg_res, - tcg_res, tcg_elt, tcg_res, tcg_elt); - break; + if (is_u) { + tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt); + } else { + tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt); + } break; default: g_assert_not_reached(); @@ -7165,13 +7398,26 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, int immh, int immb, int opcode, int rn, int rd) { - bool is_double = extract32(immh, 3, 1); - int size = is_double ? MO_64 : MO_32; - int elements; + int size, elements, fracbits; int immhb = immh << 3 | immb; - int fracbits = (is_double ? 128 : 64) - immhb; - if (!extract32(immh, 2, 2)) { + if (immh & 8) { + size = MO_64; + if (!is_scalar && !is_q) { + unallocated_encoding(s); + return; + } + } else if (immh & 4) { + size = MO_32; + } else if (immh & 2) { + size = MO_16; + if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + unallocated_encoding(s); + return; + } + } else { + /* immh == 0 would be a failure of the decode logic */ + g_assert(immh == 1); unallocated_encoding(s); return; } @@ -7179,20 +7425,14 @@ static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar, if (is_scalar) { elements = 1; } else { - elements = is_double ? 2 : is_q ? 4 : 2; - if (is_double && !is_q) { - unallocated_encoding(s); - return; - } + elements = (8 << is_q) >> size; } + fracbits = (16 << size) - immhb; if (!fp_access_check(s)) { return; } - /* immh == 0 would be a failure of the decode logic */ - g_assert(immh); - handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size); } @@ -7201,19 +7441,28 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, bool is_q, bool is_u, int immh, int immb, int rn, int rd) { - bool is_double = extract32(immh, 3, 1); int immhb = immh << 3 | immb; - int fracbits = (is_double ? 128 : 64) - immhb; - int pass; + int pass, size, fracbits; TCGv_ptr tcg_fpstatus; TCGv_i32 tcg_rmode, tcg_shift; - if (!extract32(immh, 2, 2)) { - unallocated_encoding(s); - return; - } - - if (!is_scalar && !is_q && is_double) { + if (immh & 0x8) { + size = MO_64; + if (!is_scalar && !is_q) { + unallocated_encoding(s); + return; + } + } else if (immh & 0x4) { + size = MO_32; + } else if (immh & 0x2) { + size = MO_16; + if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) { + unallocated_encoding(s); + return; + } + } else { + /* Should have split out AdvSIMD modified immediate earlier. */ + assert(immh == 1); unallocated_encoding(s); return; } @@ -7225,11 +7474,12 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, assert(!(is_scalar && is_q)); tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO)); - tcg_fpstatus = get_fpstatus_ptr(false); + tcg_fpstatus = get_fpstatus_ptr(size == MO_16); gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus); + fracbits = (16 << size) - immhb; tcg_shift = tcg_const_i32(fracbits); - if (is_double) { + if (size == MO_64) { int maxpass = is_scalar ? 1 : 2; for (pass = 0; pass < maxpass; pass++) { @@ -7246,20 +7496,37 @@ static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar, } clear_vec_high(s, is_q, rd); } else { - int maxpass = is_scalar ? 1 : is_q ? 4 : 2; + void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr); + int maxpass = is_scalar ? 1 : ((8 << is_q) >> size); + + switch (size) { + case MO_16: + if (is_u) { + fn = gen_helper_vfp_touhh; + } else { + fn = gen_helper_vfp_toshh; + } + break; + case MO_32: + if (is_u) { + fn = gen_helper_vfp_touls; + } else { + fn = gen_helper_vfp_tosls; + } + break; + default: + g_assert_not_reached(); + } + for (pass = 0; pass < maxpass; pass++) { TCGv_i32 tcg_op = tcg_temp_new_i32(); - read_vec_element_i32(s, tcg_op, rn, pass, MO_32); - if (is_u) { - gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); - } else { - gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); - } + read_vec_element_i32(s, tcg_op, rn, pass, size); + fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus); if (is_scalar) { write_fp_sreg(s, rd, tcg_op); } else { - write_vec_element_i32(s, tcg_op, rd, pass, MO_32); + write_vec_element_i32(s, tcg_op, rd, pass, size); } tcg_temp_free_i32(tcg_op); } @@ -9927,27 +10194,6 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) } } -/* Helper functions for 32 bit comparisons */ -static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) -{ - tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2); -} - -static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) -{ - tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2); -} - -static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) -{ - tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2); -} - -static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2) -{ - tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2); -} - /* Pairwise op subgroup of C3.6.16. * * This is called directly or via the handle_3same_float for float pairwise @@ -10047,7 +10293,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, static NeonGenTwoOpFn * const fns[3][2] = { { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 }, { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 }, - { gen_max_s32, gen_max_u32 }, + { tcg_gen_smax_i32, tcg_gen_umax_i32 }, }; genfn = fns[size][u]; break; @@ -10057,7 +10303,7 @@ static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode, static NeonGenTwoOpFn * const fns[3][2] = { { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 }, { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 }, - { gen_min_s32, gen_min_u32 }, + { tcg_gen_smin_i32, tcg_gen_umin_i32 }, }; genfn = fns[size][u]; break; @@ -10512,7 +10758,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) static NeonGenTwoOpFn * const fns[3][2] = { { gen_helper_neon_max_s8, gen_helper_neon_max_u8 }, { gen_helper_neon_max_s16, gen_helper_neon_max_u16 }, - { gen_max_s32, gen_max_u32 }, + { tcg_gen_smax_i32, tcg_gen_umax_i32 }, }; genfn = fns[size][u]; break; @@ -10523,7 +10769,7 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) static NeonGenTwoOpFn * const fns[3][2] = { { gen_helper_neon_min_s8, gen_helper_neon_min_u8 }, { gen_helper_neon_min_s16, gen_helper_neon_min_u16 }, - { gen_min_s32, gen_min_u32 }, + { tcg_gen_smin_i32, tcg_gen_umin_i32 }, }; genfn = fns[size][u]; break; diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 1788668c6f..ee2bbc55b0 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -716,7 +716,6 @@ static void gen_atomic(DisasContext *ctx, uint32_t opc, TCGv src1, src2, dat; TCGLabel *l1, *l2; TCGMemOp mop; - TCGCond cond; bool aq, rl; /* Extract the size of the atomic operation. */ @@ -814,60 +813,29 @@ static void gen_atomic(DisasContext *ctx, uint32_t opc, tcg_gen_atomic_fetch_or_tl(src2, src1, src2, ctx->mem_idx, mop); gen_set_gpr(rd, src2); break; - case OPC_RISC_AMOMIN: - cond = TCG_COND_LT; - goto do_minmax; - case OPC_RISC_AMOMAX: - cond = TCG_COND_GT; - goto do_minmax; - case OPC_RISC_AMOMINU: - cond = TCG_COND_LTU; - goto do_minmax; - case OPC_RISC_AMOMAXU: - cond = TCG_COND_GTU; - goto do_minmax; - do_minmax: - /* Handle the RL barrier. The AQ barrier is handled along the - parallel path by the SC atomic cmpxchg. On the serial path, - of course, barriers do not matter. */ - if (rl) { - tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); - } - if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { - l1 = gen_new_label(); - gen_set_label(l1); - } else { - l1 = NULL; - } - gen_get_gpr(src1, rs1); gen_get_gpr(src2, rs2); - if ((mop & MO_SSIZE) == MO_SL) { - /* Sign-extend the register comparison input. */ - tcg_gen_ext32s_tl(src2, src2); - } - dat = tcg_temp_local_new(); - tcg_gen_qemu_ld_tl(dat, src1, ctx->mem_idx, mop); - tcg_gen_movcond_tl(cond, src2, dat, src2, dat, src2); - - if (tb_cflags(ctx->base.tb) & CF_PARALLEL) { - /* Parallel context. Make this operation atomic by verifying - that the memory didn't change while we computed the result. */ - tcg_gen_atomic_cmpxchg_tl(src2, src1, dat, src2, ctx->mem_idx, mop); - - /* If the cmpxchg failed, retry. */ - /* ??? There is an assumption here that this will eventually - succeed, such that we don't live-lock. This is not unlike - a similar loop that the compiler would generate for e.g. - __atomic_fetch_and_xor, so don't worry about it. */ - tcg_gen_brcond_tl(TCG_COND_NE, dat, src2, l1); - } else { - /* Serial context. Directly store the result. */ - tcg_gen_qemu_st_tl(src2, src1, ctx->mem_idx, mop); - } - gen_set_gpr(rd, dat); - tcg_temp_free(dat); + tcg_gen_atomic_fetch_smin_tl(src2, src1, src2, ctx->mem_idx, mop); + gen_set_gpr(rd, src2); + break; + case OPC_RISC_AMOMAX: + gen_get_gpr(src1, rs1); + gen_get_gpr(src2, rs2); + tcg_gen_atomic_fetch_smax_tl(src2, src1, src2, ctx->mem_idx, mop); + gen_set_gpr(rd, src2); + break; + case OPC_RISC_AMOMINU: + gen_get_gpr(src1, rs1); + gen_get_gpr(src2, rs2); + tcg_gen_atomic_fetch_umin_tl(src2, src1, src2, ctx->mem_idx, mop); + gen_set_gpr(rd, src2); + break; + case OPC_RISC_AMOMAXU: + gen_get_gpr(src1, rs1); + gen_get_gpr(src2, rs2); + tcg_gen_atomic_fetch_umax_tl(src2, src1, src2, ctx->mem_idx, mop); + gen_set_gpr(rd, src2); break; default: diff --git a/target/xtensa/translate.c b/target/xtensa/translate.c index aad496347d..ae0feb0254 100644 --- a/target/xtensa/translate.c +++ b/target/xtensa/translate.c @@ -1526,10 +1526,8 @@ static void translate_clamps(DisasContext *dc, const uint32_t arg[], TCGv_i32 tmp1 = tcg_const_i32(-1u << arg[2]); TCGv_i32 tmp2 = tcg_const_i32((1 << arg[2]) - 1); - tcg_gen_movcond_i32(TCG_COND_GT, tmp1, - cpu_R[arg[1]], tmp1, cpu_R[arg[1]], tmp1); - tcg_gen_movcond_i32(TCG_COND_LT, cpu_R[arg[0]], - tmp1, tmp2, tmp1, tmp2); + tcg_gen_smax_i32(tmp1, tmp1, cpu_R[arg[1]]); + tcg_gen_smin_i32(cpu_R[arg[0]], tmp1, tmp2); tcg_temp_free(tmp1); tcg_temp_free(tmp2); } @@ -1854,13 +1852,35 @@ static void translate_memw(DisasContext *dc, const uint32_t arg[], tcg_gen_mb(TCG_BAR_SC | TCG_MO_ALL); } -static void translate_minmax(DisasContext *dc, const uint32_t arg[], - const uint32_t par[]) +static void translate_smin(DisasContext *dc, const uint32_t arg[], + const uint32_t par[]) { if (gen_window_check3(dc, arg[0], arg[1], arg[2])) { - tcg_gen_movcond_i32(par[0], cpu_R[arg[0]], - cpu_R[arg[1]], cpu_R[arg[2]], - cpu_R[arg[1]], cpu_R[arg[2]]); + tcg_gen_smin_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]); + } +} + +static void translate_umin(DisasContext *dc, const uint32_t arg[], + const uint32_t par[]) +{ + if (gen_window_check3(dc, arg[0], arg[1], arg[2])) { + tcg_gen_umin_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]); + } +} + +static void translate_smax(DisasContext *dc, const uint32_t arg[], + const uint32_t par[]) +{ + if (gen_window_check3(dc, arg[0], arg[1], arg[2])) { + tcg_gen_smax_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]); + } +} + +static void translate_umax(DisasContext *dc, const uint32_t arg[], + const uint32_t par[]) +{ + if (gen_window_check3(dc, arg[0], arg[1], arg[2])) { + tcg_gen_umax_i32(cpu_R[arg[0]], cpu_R[arg[1]], cpu_R[arg[2]]); } } @@ -2983,23 +3003,19 @@ static const XtensaOpcodeOps core_ops[] = { .par = (const uint32_t[]){TCG_COND_NE}, }, { .name = "max", - .translate = translate_minmax, - .par = (const uint32_t[]){TCG_COND_GE}, + .translate = translate_smax, }, { .name = "maxu", - .translate = translate_minmax, - .par = (const uint32_t[]){TCG_COND_GEU}, + .translate = translate_umax, }, { .name = "memw", .translate = translate_memw, }, { .name = "min", - .translate = translate_minmax, - .par = (const uint32_t[]){TCG_COND_LT}, + .translate = translate_smin, }, { .name = "minu", - .translate = translate_minmax, - .par = (const uint32_t[]){TCG_COND_LTU}, + .translate = translate_umin, }, { .name = "mov", .translate = translate_mov, diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 34b96d68f3..6a914654f5 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -1033,6 +1033,26 @@ void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg) } } +void tcg_gen_smin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, a, b); +} + +void tcg_gen_umin_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, a, b); +} + +void tcg_gen_smax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_movcond_i32(TCG_COND_LT, ret, a, b, b, a); +} + +void tcg_gen_umax_i32(TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_movcond_i32(TCG_COND_LTU, ret, a, b, b, a); +} + /* 64-bit ops */ #if TCG_TARGET_REG_BITS == 32 @@ -2438,6 +2458,26 @@ void tcg_gen_mulsu2_i64(TCGv_i64 rl, TCGv_i64 rh, TCGv_i64 arg1, TCGv_i64 arg2) tcg_temp_free_i64(t2); } +void tcg_gen_smin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, a, b); +} + +void tcg_gen_umin_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, a, b); +} + +void tcg_gen_smax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_movcond_i64(TCG_COND_LT, ret, a, b, b, a); +} + +void tcg_gen_umax_i64(TCGv_i64 ret, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_movcond_i64(TCG_COND_LTU, ret, a, b, b, a); +} + /* Size changing operations. */ void tcg_gen_extrl_i64_i32(TCGv_i32 ret, TCGv_i64 arg) @@ -3011,11 +3051,19 @@ GEN_ATOMIC_HELPER(fetch_add, add, 0) GEN_ATOMIC_HELPER(fetch_and, and, 0) GEN_ATOMIC_HELPER(fetch_or, or, 0) GEN_ATOMIC_HELPER(fetch_xor, xor, 0) +GEN_ATOMIC_HELPER(fetch_smin, smin, 0) +GEN_ATOMIC_HELPER(fetch_umin, umin, 0) +GEN_ATOMIC_HELPER(fetch_smax, smax, 0) +GEN_ATOMIC_HELPER(fetch_umax, umax, 0) GEN_ATOMIC_HELPER(add_fetch, add, 1) GEN_ATOMIC_HELPER(and_fetch, and, 1) GEN_ATOMIC_HELPER(or_fetch, or, 1) GEN_ATOMIC_HELPER(xor_fetch, xor, 1) +GEN_ATOMIC_HELPER(smin_fetch, smin, 1) +GEN_ATOMIC_HELPER(umin_fetch, umin, 1) +GEN_ATOMIC_HELPER(smax_fetch, smax, 1) +GEN_ATOMIC_HELPER(umax_fetch, umax, 1) static void tcg_gen_mov2_i32(TCGv_i32 r, TCGv_i32 a, TCGv_i32 b) { diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 5d2c91a1b6..04eb3e9e17 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -324,6 +324,10 @@ void tcg_gen_ext8u_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg); +void tcg_gen_smin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_smax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_umin_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); +void tcg_gen_umax_i32(TCGv_i32, TCGv_i32 arg1, TCGv_i32 arg2); static inline void tcg_gen_discard_i32(TCGv_i32 arg) { @@ -517,6 +521,10 @@ void tcg_gen_ext32u_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_bswap16_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_bswap32_i64(TCGv_i64 ret, TCGv_i64 arg); void tcg_gen_bswap64_i64(TCGv_i64 ret, TCGv_i64 arg); +void tcg_gen_smin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_smax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_umin_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); +void tcg_gen_umax_i64(TCGv_i64, TCGv_i64 arg1, TCGv_i64 arg2); #if TCG_TARGET_REG_BITS == 64 static inline void tcg_gen_discard_i64(TCGv_i64 arg) @@ -890,6 +898,7 @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGv_i64, void tcg_gen_atomic_xchg_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); void tcg_gen_atomic_xchg_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); + void tcg_gen_atomic_fetch_add_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); void tcg_gen_atomic_fetch_add_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); void tcg_gen_atomic_fetch_and_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); @@ -898,6 +907,15 @@ void tcg_gen_atomic_fetch_or_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); void tcg_gen_atomic_fetch_or_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); void tcg_gen_atomic_fetch_xor_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); void tcg_gen_atomic_fetch_xor_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_smin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_smin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_umin_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_umin_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_smax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_smax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_umax_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_fetch_umax_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); + void tcg_gen_atomic_add_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); void tcg_gen_atomic_add_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); void tcg_gen_atomic_and_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); @@ -906,6 +924,14 @@ void tcg_gen_atomic_or_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); void tcg_gen_atomic_or_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); void tcg_gen_atomic_xor_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_smin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_smin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_umin_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_umin_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_smax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_smax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); +void tcg_gen_atomic_umax_fetch_i32(TCGv_i32, TCGv, TCGv_i32, TCGArg, TCGMemOp); +void tcg_gen_atomic_umax_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp); void tcg_gen_mov_vec(TCGv_vec, TCGv_vec); void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec, TCGv_i32); @@ -1025,16 +1051,28 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_mulu2_tl tcg_gen_mulu2_i64 #define tcg_gen_muls2_tl tcg_gen_muls2_i64 #define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i64 +#define tcg_gen_smin_tl tcg_gen_smin_i64 +#define tcg_gen_umin_tl tcg_gen_umin_i64 +#define tcg_gen_smax_tl tcg_gen_smax_i64 +#define tcg_gen_umax_tl tcg_gen_umax_i64 #define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i64 #define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i64 #define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i64 #define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i64 #define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i64 #define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i64 +#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i64 +#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i64 +#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i64 +#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i64 #define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i64 #define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i64 #define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i64 #define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i64 +#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i64 +#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i64 +#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i64 +#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i64 #define tcg_gen_dup_tl_vec tcg_gen_dup_i64_vec #else #define tcg_gen_movi_tl tcg_gen_movi_i32 @@ -1123,16 +1161,28 @@ void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr base, TCGArg offset, TCGType t); #define tcg_gen_mulu2_tl tcg_gen_mulu2_i32 #define tcg_gen_muls2_tl tcg_gen_muls2_i32 #define tcg_gen_mulsu2_tl tcg_gen_mulsu2_i32 +#define tcg_gen_smin_tl tcg_gen_smin_i32 +#define tcg_gen_umin_tl tcg_gen_umin_i32 +#define tcg_gen_smax_tl tcg_gen_smax_i32 +#define tcg_gen_umax_tl tcg_gen_umax_i32 #define tcg_gen_atomic_cmpxchg_tl tcg_gen_atomic_cmpxchg_i32 #define tcg_gen_atomic_xchg_tl tcg_gen_atomic_xchg_i32 #define tcg_gen_atomic_fetch_add_tl tcg_gen_atomic_fetch_add_i32 #define tcg_gen_atomic_fetch_and_tl tcg_gen_atomic_fetch_and_i32 #define tcg_gen_atomic_fetch_or_tl tcg_gen_atomic_fetch_or_i32 #define tcg_gen_atomic_fetch_xor_tl tcg_gen_atomic_fetch_xor_i32 +#define tcg_gen_atomic_fetch_smin_tl tcg_gen_atomic_fetch_smin_i32 +#define tcg_gen_atomic_fetch_umin_tl tcg_gen_atomic_fetch_umin_i32 +#define tcg_gen_atomic_fetch_smax_tl tcg_gen_atomic_fetch_smax_i32 +#define tcg_gen_atomic_fetch_umax_tl tcg_gen_atomic_fetch_umax_i32 #define tcg_gen_atomic_add_fetch_tl tcg_gen_atomic_add_fetch_i32 #define tcg_gen_atomic_and_fetch_tl tcg_gen_atomic_and_fetch_i32 #define tcg_gen_atomic_or_fetch_tl tcg_gen_atomic_or_fetch_i32 #define tcg_gen_atomic_xor_fetch_tl tcg_gen_atomic_xor_fetch_i32 +#define tcg_gen_atomic_smin_fetch_tl tcg_gen_atomic_smin_fetch_i32 +#define tcg_gen_atomic_umin_fetch_tl tcg_gen_atomic_umin_fetch_i32 +#define tcg_gen_atomic_smax_fetch_tl tcg_gen_atomic_smax_fetch_i32 +#define tcg_gen_atomic_umax_fetch_tl tcg_gen_atomic_umax_fetch_i32 #define tcg_gen_dup_tl_vec tcg_gen_dup_i32_vec #endif diff --git a/tcg/tcg.h b/tcg/tcg.h index 88378be310..08f8bbfe54 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -1421,12 +1421,20 @@ GEN_ATOMIC_HELPER_ALL(fetch_sub) GEN_ATOMIC_HELPER_ALL(fetch_and) GEN_ATOMIC_HELPER_ALL(fetch_or) GEN_ATOMIC_HELPER_ALL(fetch_xor) +GEN_ATOMIC_HELPER_ALL(fetch_smin) +GEN_ATOMIC_HELPER_ALL(fetch_umin) +GEN_ATOMIC_HELPER_ALL(fetch_smax) +GEN_ATOMIC_HELPER_ALL(fetch_umax) GEN_ATOMIC_HELPER_ALL(add_fetch) GEN_ATOMIC_HELPER_ALL(sub_fetch) GEN_ATOMIC_HELPER_ALL(and_fetch) GEN_ATOMIC_HELPER_ALL(or_fetch) GEN_ATOMIC_HELPER_ALL(xor_fetch) +GEN_ATOMIC_HELPER_ALL(smin_fetch) +GEN_ATOMIC_HELPER_ALL(umin_fetch) +GEN_ATOMIC_HELPER_ALL(smax_fetch) +GEN_ATOMIC_HELPER_ALL(umax_fetch) GEN_ATOMIC_HELPER_ALL(xchg)