diff --git a/configs/targets/riscv64-softmmu.mak b/configs/targets/riscv64-softmmu.mak index 917980e63e..6c5de72e03 100644 --- a/configs/targets/riscv64-softmmu.mak +++ b/configs/targets/riscv64-softmmu.mak @@ -2,6 +2,6 @@ TARGET_ARCH=riscv64 TARGET_BASE_ARCH=riscv TARGET_SUPPORTS_MTTCG=y TARGET_KVM_HAVE_GUEST_DEBUG=y -TARGET_XML_FILES= gdb-xml/riscv-64bit-cpu.xml gdb-xml/riscv-32bit-fpu.xml gdb-xml/riscv-64bit-fpu.xml gdb-xml/riscv-64bit-virtual.xml +TARGET_XML_FILES= gdb-xml/riscv-64bit-cpu.xml gdb-xml/riscv-32bit-fpu.xml gdb-xml/riscv-64bit-fpu.xml gdb-xml/riscv-64bit-virtual.xml gdb-xml/riscv-32bit-cpu.xml gdb-xml/riscv-32bit-virtual.xml # needed by boot.c TARGET_NEED_FDT=y diff --git a/disas/riscv.c b/disas/riscv.c index fc0331b90b..9c1e332dde 100644 --- a/disas/riscv.c +++ b/disas/riscv.c @@ -976,6 +976,14 @@ typedef enum { rv_op_amocas_h = 945, rv_op_wrs_sto = 946, rv_op_wrs_nto = 947, + rv_op_lpad = 948, + rv_op_sspush = 949, + rv_op_sspopchk = 950, + rv_op_ssrdp = 951, + rv_op_ssamoswap_w = 952, + rv_op_ssamoswap_d = 953, + rv_op_c_sspush = 954, + rv_op_c_sspopchk = 955, } rv_op; /* register names */ @@ -2236,6 +2244,16 @@ const rv_opcode_data rvi_opcode_data[] = { { "amocas.h", rv_codec_r_a, rv_fmt_aqrl_rd_rs2_rs1, NULL, 0, 0, 0 }, { "wrs.sto", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 }, { "wrs.nto", rv_codec_none, rv_fmt_none, NULL, 0, 0, 0 }, + { "lpad", rv_codec_lp, rv_fmt_imm, NULL, 0, 0, 0 }, + { "sspush", rv_codec_r, rv_fmt_rs2, NULL, 0, 0, 0 }, + { "sspopchk", rv_codec_r, rv_fmt_rs1, NULL, 0, 0, 0 }, + { "ssrdp", rv_codec_r, rv_fmt_rd, NULL, 0, 0, 0 }, + { "ssamoswap.w", rv_codec_r_a, rv_fmt_aqrl_rd_rs2_rs1, NULL, 0, 0, 0 }, + { "ssamoswap.d", rv_codec_r_a, rv_fmt_aqrl_rd_rs2_rs1, NULL, 0, 0, 0 }, + { "c.sspush", rv_codec_cmop_ss, rv_fmt_rs2, NULL, rv_op_sspush, + rv_op_sspush, 0 }, + { "c.sspopchk", rv_codec_cmop_ss, rv_fmt_rs1, NULL, rv_op_sspopchk, + rv_op_sspopchk, 0 }, }; /* CSR names */ @@ -2253,6 +2271,7 @@ static const char *csr_name(int csrno) case 0x0009: return "vxsat"; case 0x000a: return "vxrm"; case 0x000f: return "vcsr"; + case 0x0011: return "ssp"; case 0x0015: return "seed"; case 0x0017: return "jvt"; case 0x0040: return "uscratch"; @@ -2595,7 +2614,13 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa) if (dec->cfg->ext_zcmop) { if ((((inst >> 2) & 0b111111) == 0b100000) && (((inst >> 11) & 0b11) == 0b0)) { - op = rv_c_mop_1 + ((inst >> 8) & 0b111); + unsigned int cmop_code = 0; + cmop_code = ((inst >> 8) & 0b111); + op = rv_c_mop_1 + cmop_code; + if (dec->cfg->ext_zicfiss) { + op = (cmop_code == 0) ? rv_op_c_sspush : op; + op = (cmop_code == 2) ? rv_op_c_sspopchk : op; + } break; } } @@ -2929,7 +2954,13 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa) case 7: op = rv_op_andi; break; } break; - case 5: op = rv_op_auipc; break; + case 5: + op = rv_op_auipc; + if (dec->cfg->ext_zicfilp && + (((inst >> 7) & 0b11111) == 0b00000)) { + op = rv_op_lpad; + } + break; case 6: switch ((inst >> 12) & 0b111) { case 0: op = rv_op_addiw; break; @@ -3073,6 +3104,8 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa) case 66: op = rv_op_amoor_w; break; case 67: op = rv_op_amoor_d; break; case 68: op = rv_op_amoor_q; break; + case 74: op = rv_op_ssamoswap_w; break; + case 75: op = rv_op_ssamoswap_d; break; case 96: op = rv_op_amoand_b; break; case 97: op = rv_op_amoand_h; break; case 98: op = rv_op_amoand_w; break; @@ -4026,7 +4059,7 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa) case 3: op = rv_op_csrrc; break; case 4: if (dec->cfg->ext_zimop) { - int imm_mop5, imm_mop3; + int imm_mop5, imm_mop3, reg_num; if ((extract32(inst, 22, 10) & 0b1011001111) == 0b1000000111) { imm_mop5 = deposit32(deposit32(extract32(inst, 20, 2), @@ -4034,11 +4067,36 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa) extract32(inst, 26, 2)), 4, 1, extract32(inst, 30, 1)); op = rv_mop_r_0 + imm_mop5; + /* if zicfiss enabled and mop5 is shadow stack */ + if (dec->cfg->ext_zicfiss && + ((imm_mop5 & 0b11100) == 0b11100)) { + /* rs1=0 means ssrdp */ + if ((inst & (0b011111 << 15)) == 0) { + op = rv_op_ssrdp; + } + /* rd=0 means sspopchk */ + reg_num = (inst >> 15) & 0b011111; + if (((inst & (0b011111 << 7)) == 0) && + ((reg_num == 1) || (reg_num == 5))) { + op = rv_op_sspopchk; + } + } } else if ((extract32(inst, 25, 7) & 0b1011001) == 0b1000001) { imm_mop3 = deposit32(extract32(inst, 26, 2), 2, 1, extract32(inst, 30, 1)); op = rv_mop_rr_0 + imm_mop3; + /* if zicfiss enabled and mop3 is shadow stack */ + if (dec->cfg->ext_zicfiss && + ((imm_mop3 & 0b111) == 0b111)) { + /* rs1=0 and rd=0 means sspush */ + reg_num = (inst >> 20) & 0b011111; + if (((inst & (0b011111 << 15)) == 0) && + ((inst & (0b011111 << 7)) == 0) && + ((reg_num == 1) || (reg_num == 5))) { + op = rv_op_sspush; + } + } } } break; @@ -4488,6 +4546,11 @@ static uint32_t operand_tbl_index(rv_inst inst) return ((inst << 54) >> 56); } +static uint32_t operand_lpl(rv_inst inst) +{ + return inst >> 12; +} + /* decode operands */ static void decode_inst_operands(rv_decode *dec, rv_isa isa) @@ -4875,6 +4938,14 @@ static void decode_inst_operands(rv_decode *dec, rv_isa isa) dec->imm = sextract32(operand_rs2(inst), 0, 5); dec->imm1 = operand_imm2(inst); break; + case rv_codec_lp: + dec->imm = operand_lpl(inst); + break; + case rv_codec_cmop_ss: + dec->rd = rv_ireg_zero; + dec->rs1 = dec->rs2 = operand_crs1(inst); + dec->imm = 0; + break; }; } diff --git a/disas/riscv.h b/disas/riscv.h index 0d1f89ce8a..d211700cb2 100644 --- a/disas/riscv.h +++ b/disas/riscv.h @@ -166,6 +166,8 @@ typedef enum { rv_codec_r2_immhl, rv_codec_r2_imm2_imm5, rv_codec_fli, + rv_codec_lp, + rv_codec_cmop_ss, } rv_codec; /* structures */ @@ -223,11 +225,13 @@ enum { #define rv_fmt_none "O\t" #define rv_fmt_rs1 "O\t1" +#define rv_fmt_rs2 "O\t2" #define rv_fmt_offset "O\to" #define rv_fmt_pred_succ "O\tp,s" #define rv_fmt_rs1_rs2 "O\t1,2" #define rv_fmt_rd_imm "O\t0,i" #define rv_fmt_rd_uimm "O\t0,Ui" +#define rv_fmt_imm "O\ti" #define rv_fmt_rd_offset "O\t0,o" #define rv_fmt_rd_uoffset "O\t0,Uo" #define rv_fmt_rd_rs1_rs2 "O\t0,1,2" diff --git a/docs/specs/index.rst b/docs/specs/index.rst index 6495ed5ed9..ff5a1f03da 100644 --- a/docs/specs/index.rst +++ b/docs/specs/index.rst @@ -36,3 +36,4 @@ guest hardware that is specific to QEMU. vmgenid rapl-msr rocker + riscv-iommu diff --git a/docs/specs/pci-ids.rst b/docs/specs/pci-ids.rst index 328ab31fe8..261b0f359f 100644 --- a/docs/specs/pci-ids.rst +++ b/docs/specs/pci-ids.rst @@ -98,6 +98,8 @@ PCI devices (other than virtio): PCI ACPI ERST device (``-device acpi-erst``) 1b36:0013 PCI UFS device (``-device ufs``) +1b36:0014 + PCI RISC-V IOMMU device All these devices are documented in :doc:`index`. diff --git a/docs/specs/riscv-iommu.rst b/docs/specs/riscv-iommu.rst new file mode 100644 index 0000000000..463f4cffb6 --- /dev/null +++ b/docs/specs/riscv-iommu.rst @@ -0,0 +1,90 @@ +.. _riscv-iommu: + +RISC-V IOMMU support for RISC-V machines +======================================== + +QEMU implements a RISC-V IOMMU emulation based on the RISC-V IOMMU spec +version 1.0 `iommu1.0`_. + +The emulation includes a PCI reference device, riscv-iommu-pci, that QEMU +RISC-V boards can use. The 'virt' RISC-V machine is compatible with this +device. + +riscv-iommu-pci reference device +-------------------------------- + +This device implements the RISC-V IOMMU emulation as recommended by the section +"Integrating an IOMMU as a PCIe device" of `iommu1.0`_: a PCI device with base +class 08h, sub-class 06h and programming interface 00h. + +As a reference device it doesn't implement anything outside of the specification, +so it uses a generic default PCI ID given by QEMU: 1b36:0014. + +To include the device in the 'virt' machine: + +.. code-block:: bash + + $ qemu-system-riscv64 -M virt -device riscv-iommu-pci,[optional_pci_opts] (...) + +This will add a RISC-V IOMMU PCI device in the board following any additional +PCI parameters (like PCI bus address). The behavior of the RISC-V IOMMU is +defined by the spec but its operation is OS dependent. + +As of this writing the existing Linux kernel support `linux-v8`_, not yet merged, +does not have support for features like VFIO passthrough. The IOMMU emulation +was tested using a public Ventana Micro Systems kernel repository in +`ventana-linux`_. This kernel is based on `linux-v8`_ with additional patches that +enable features like KVM VFIO passthrough with irqbypass. Until the kernel support +is feature complete feel free to use the kernel available in the Ventana Micro Systems +mirror. + +The current Linux kernel support will use the IOMMU device to create IOMMU groups +with any eligible cards available in the system, regardless of factors such as the +order in which the devices are added in the command line. + +This means that these command lines are equivalent as far as the current +IOMMU kernel driver behaves: + +.. code-block:: bash + + $ qemu-system-riscv64 \ + -M virt,aia=aplic-imsic,aia-guests=5 \ + -device riscv-iommu-pci,addr=1.0,vendor-id=0x1efd,device-id=0xedf1 \ + -device e1000e,netdev=net1 -netdev user,id=net1,net=192.168.0.0/24 \ + -device e1000e,netdev=net2 -netdev user,id=net2,net=192.168.200.0/24 \ + (...) + + $ qemu-system-riscv64 \ + -M virt,aia=aplic-imsic,aia-guests=5 \ + -device e1000e,netdev=net1 -netdev user,id=net1,net=192.168.0.0/24 \ + -device e1000e,netdev=net2 -netdev user,id=net2,net=192.168.200.0/24 \ + -device riscv-iommu-pci,addr=1.0,vendor-id=0x1efd,device-id=0xedf1 \ + (...) + +Both will create iommu groups for the two e1000e cards. + +Another thing to notice on `linux-v8`_ and `ventana-linux`_ is that the kernel driver +considers an IOMMU identified as a Rivos device, i.e. it uses Rivos vendor ID. To +use the riscv-iommu-pci device with the existing kernel support we need to emulate +a Rivos PCI IOMMU by setting 'vendor-id' and 'device-id': + +.. code-block:: bash + + $ qemu-system-riscv64 -M virt \ + -device riscv-iommu-pci,vendor-id=0x1efd,device-id=0xedf1 (...) + +Several options are available to control the capabilities of the device, namely: + +- "bus": the bus that the IOMMU device uses +- "ioatc-limit": size of the Address Translation Cache (default to 2Mb) +- "intremap": enable/disable MSI support +- "ats": enable ATS support +- "off" (Out-of-reset translation mode: 'on' for DMA disabled, 'off' for 'BARE' (passthrough)) +- "s-stage": enable s-stage support +- "g-stage": enable g-stage support + +.. _iommu1.0: https://github.com/riscv-non-isa/riscv-iommu/releases/download/v1.0/riscv-iommu.pdf + +.. _linux-v8: https://lore.kernel.org/linux-riscv/cover.1718388908.git.tjeznach@rivosinc.com/ + +.. _ventana-linux: https://github.com/ventanamicro/linux/tree/dev-upstream diff --git a/docs/system/riscv/virt.rst b/docs/system/riscv/virt.rst index 9a06f95a34..8e9a2e4dda 100644 --- a/docs/system/riscv/virt.rst +++ b/docs/system/riscv/virt.rst @@ -84,6 +84,19 @@ none``, as in Firmware images used for pflash must be exactly 32 MiB in size. +riscv-iommu support +------------------- + +The board has support for the riscv-iommu-pci device by using the following +command line: + +.. code-block:: bash + + $ qemu-system-riscv64 -M virt -device riscv-iommu-pci (...) + +Refer to :ref:`riscv-iommu` for more information on how the RISC-V IOMMU support +works. + Machine-specific options ------------------------ diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c index 54fd55c3e6..0345088e8b 100644 --- a/hw/char/riscv_htif.c +++ b/hw/char/riscv_htif.c @@ -217,7 +217,11 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written) tswap64(syscall[3]) == HTIF_CONSOLE_CMD_PUTC) { uint8_t ch; cpu_physical_memory_read(tswap64(syscall[2]), &ch, 1); - qemu_chr_fe_write(&s->chr, &ch, 1); + /* + * XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks + */ + qemu_chr_fe_write_all(&s->chr, &ch, 1); resp = 0x100 | (uint8_t)payload; } else { qemu_log_mask(LOG_UNIMP, @@ -236,7 +240,11 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written) return; } else if (cmd == HTIF_CONSOLE_CMD_PUTC) { uint8_t ch = (uint8_t)payload; - qemu_chr_fe_write(&s->chr, &ch, 1); + /* + * XXX this blocks entire thread. Rewrite to use + * qemu_chr_fe_write and background I/O callbacks + */ + qemu_chr_fe_write_all(&s->chr, &ch, 1); resp = 0x100 | (uint8_t)payload; } else { qemu_log("HTIF device %d: unknown command\n", device); diff --git a/hw/char/sifive_uart.c b/hw/char/sifive_uart.c index 7fc6787f69..aeb45d3601 100644 --- a/hw/char/sifive_uart.c +++ b/hw/char/sifive_uart.c @@ -26,6 +26,8 @@ #include "hw/char/sifive_uart.h" #include "hw/qdev-properties-system.h" +#define TX_INTERRUPT_TRIGGER_DELAY_NS 100 + /* * Not yet implemented: * @@ -64,6 +66,72 @@ static void sifive_uart_update_irq(SiFiveUARTState *s) } } +static gboolean sifive_uart_xmit(void *do_not_use, GIOCondition cond, + void *opaque) +{ + SiFiveUARTState *s = opaque; + int ret; + const uint8_t *characters; + uint32_t numptr = 0; + + /* instant drain the fifo when there's no back-end */ + if (!qemu_chr_fe_backend_connected(&s->chr)) { + fifo8_reset(&s->tx_fifo); + return G_SOURCE_REMOVE; + } + + if (fifo8_is_empty(&s->tx_fifo)) { + return G_SOURCE_REMOVE; + } + + /* Don't pop the FIFO in case the write fails */ + characters = fifo8_peek_bufptr(&s->tx_fifo, + fifo8_num_used(&s->tx_fifo), &numptr); + ret = qemu_chr_fe_write(&s->chr, characters, numptr); + + if (ret >= 0) { + /* We wrote the data, actually pop the fifo */ + fifo8_pop_bufptr(&s->tx_fifo, ret, NULL); + } + + if (!fifo8_is_empty(&s->tx_fifo)) { + guint r = qemu_chr_fe_add_watch(&s->chr, G_IO_OUT | G_IO_HUP, + sifive_uart_xmit, s); + if (!r) { + fifo8_reset(&s->tx_fifo); + return G_SOURCE_REMOVE; + } + } + + /* Clear the TX Full bit */ + if (!fifo8_is_full(&s->tx_fifo)) { + s->txfifo &= ~SIFIVE_UART_TXFIFO_FULL; + } + + sifive_uart_update_irq(s); + return G_SOURCE_REMOVE; +} + +static void sifive_uart_write_tx_fifo(SiFiveUARTState *s, const uint8_t *buf, + int size) +{ + uint64_t current_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + + if (size > fifo8_num_free(&s->tx_fifo)) { + size = fifo8_num_free(&s->tx_fifo); + qemu_log_mask(LOG_GUEST_ERROR, "sifive_uart: TX FIFO overflow"); + } + + fifo8_push_all(&s->tx_fifo, buf, size); + + if (fifo8_is_full(&s->tx_fifo)) { + s->txfifo |= SIFIVE_UART_TXFIFO_FULL; + } + + timer_mod(s->fifo_trigger_handle, current_time + + TX_INTERRUPT_TRIGGER_DELAY_NS); +} + static uint64_t sifive_uart_read(void *opaque, hwaddr addr, unsigned int size) { @@ -82,7 +150,7 @@ sifive_uart_read(void *opaque, hwaddr addr, unsigned int size) return 0x80000000; case SIFIVE_UART_TXFIFO: - return 0; /* Should check tx fifo */ + return s->txfifo; case SIFIVE_UART_IE: return s->ie; case SIFIVE_UART_IP: @@ -106,12 +174,10 @@ sifive_uart_write(void *opaque, hwaddr addr, { SiFiveUARTState *s = opaque; uint32_t value = val64; - unsigned char ch = value; switch (addr) { case SIFIVE_UART_TXFIFO: - qemu_chr_fe_write(&s->chr, &ch, 1); - sifive_uart_update_irq(s); + sifive_uart_write_tx_fifo(s, (uint8_t *) &value, 1); return; case SIFIVE_UART_IE: s->ie = val64; @@ -131,6 +197,13 @@ sifive_uart_write(void *opaque, hwaddr addr, __func__, (int)addr, (int)value); } +static void fifo_trigger_update(void *opaque) +{ + SiFiveUARTState *s = opaque; + + sifive_uart_xmit(NULL, G_IO_OUT, s); +} + static const MemoryRegionOps sifive_uart_ops = { .read = sifive_uart_read, .write = sifive_uart_write, @@ -197,6 +270,9 @@ static void sifive_uart_realize(DeviceState *dev, Error **errp) { SiFiveUARTState *s = SIFIVE_UART(dev); + s->fifo_trigger_handle = timer_new_ns(QEMU_CLOCK_VIRTUAL, + fifo_trigger_update, s); + qemu_chr_fe_set_handlers(&s->chr, sifive_uart_can_rx, sifive_uart_rx, sifive_uart_event, sifive_uart_be_change, s, NULL, true); @@ -206,12 +282,18 @@ static void sifive_uart_realize(DeviceState *dev, Error **errp) static void sifive_uart_reset_enter(Object *obj, ResetType type) { SiFiveUARTState *s = SIFIVE_UART(obj); + + s->txfifo = 0; s->ie = 0; s->ip = 0; s->txctrl = 0; s->rxctrl = 0; s->div = 0; + s->rx_fifo_len = 0; + + memset(s->rx_fifo, 0, SIFIVE_UART_RX_FIFO_SIZE); + fifo8_create(&s->tx_fifo, SIFIVE_UART_TX_FIFO_SIZE); } static void sifive_uart_reset_hold(Object *obj, ResetType type) @@ -222,8 +304,8 @@ static void sifive_uart_reset_hold(Object *obj, ResetType type) static const VMStateDescription vmstate_sifive_uart = { .name = TYPE_SIFIVE_UART, - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, .fields = (const VMStateField[]) { VMSTATE_UINT8_ARRAY(rx_fifo, SiFiveUARTState, SIFIVE_UART_RX_FIFO_SIZE), @@ -233,6 +315,9 @@ static const VMStateDescription vmstate_sifive_uart = { VMSTATE_UINT32(txctrl, SiFiveUARTState), VMSTATE_UINT32(rxctrl, SiFiveUARTState), VMSTATE_UINT32(div, SiFiveUARTState), + VMSTATE_UINT32(txfifo, SiFiveUARTState), + VMSTATE_FIFO8(tx_fifo, SiFiveUARTState), + VMSTATE_TIMER_PTR(fifo_trigger_handle, SiFiveUARTState), VMSTATE_END_OF_LIST() }, }; diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index 32edd6d07b..4a262c82f0 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -159,31 +159,42 @@ static bool is_kvm_aia(bool msimode) return kvm_irqchip_in_kernel() && msimode; } +static bool riscv_aplic_irq_rectified_val(RISCVAPLICState *aplic, + uint32_t irq) +{ + uint32_t sourcecfg, sm, raw_input, irq_inverted; + + if (!irq || aplic->num_irqs <= irq) { + return false; + } + + sourcecfg = aplic->sourcecfg[irq]; + if (sourcecfg & APLIC_SOURCECFG_D) { + return false; + } + + sm = sourcecfg & APLIC_SOURCECFG_SM_MASK; + if (sm == APLIC_SOURCECFG_SM_INACTIVE) { + return false; + } + + raw_input = (aplic->state[irq] & APLIC_ISTATE_INPUT) ? 1 : 0; + irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW || + sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0; + + return !!(raw_input ^ irq_inverted); +} + static uint32_t riscv_aplic_read_input_word(RISCVAPLICState *aplic, uint32_t word) { - uint32_t i, irq, sourcecfg, sm, raw_input, irq_inverted, ret = 0; + uint32_t i, irq, rectified_val, ret = 0; for (i = 0; i < 32; i++) { irq = word * 32 + i; - if (!irq || aplic->num_irqs <= irq) { - continue; - } - sourcecfg = aplic->sourcecfg[irq]; - if (sourcecfg & APLIC_SOURCECFG_D) { - continue; - } - - sm = sourcecfg & APLIC_SOURCECFG_SM_MASK; - if (sm == APLIC_SOURCECFG_SM_INACTIVE) { - continue; - } - - raw_input = (aplic->state[irq] & APLIC_ISTATE_INPUT) ? 1 : 0; - irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW || - sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0; - ret |= (raw_input ^ irq_inverted) << i; + rectified_val = riscv_aplic_irq_rectified_val(aplic, irq); + ret |= rectified_val << i; } return ret; @@ -702,6 +713,10 @@ static void riscv_aplic_write(void *opaque, hwaddr addr, uint64_t value, (aplic->sourcecfg[irq] == 0)) { riscv_aplic_set_pending_raw(aplic, irq, false); riscv_aplic_set_enabled_raw(aplic, irq, false); + } else { + if (riscv_aplic_irq_rectified_val(aplic, irq)) { + riscv_aplic_set_pending_raw(aplic, irq, true); + } } } else if (aplic->mmode && aplic->msimode && (addr == APLIC_MMSICFGADDR)) { diff --git a/hw/intc/sifive_plic.c b/hw/intc/sifive_plic.c index 7f43e96310..ed74490dba 100644 --- a/hw/intc/sifive_plic.c +++ b/hw/intc/sifive_plic.c @@ -189,8 +189,13 @@ static void sifive_plic_write(void *opaque, hwaddr addr, uint64_t value, if (addr_between(addr, plic->priority_base, plic->num_sources << 2)) { uint32_t irq = (addr - plic->priority_base) >> 2; - - if (((plic->num_priorities + 1) & plic->num_priorities) == 0) { + if (irq == 0) { + /* IRQ 0 source prioority is reserved */ + qemu_log_mask(LOG_GUEST_ERROR, + "%s: Invalid source priority write 0x%" + HWADDR_PRIx "\n", __func__, addr); + return; + } else if (((plic->num_priorities + 1) & plic->num_priorities) == 0) { /* * if "num_priorities + 1" is power-of-2, make each register bit of * interrupt priority WARL (Write-Any-Read-Legal). Just filter @@ -349,8 +354,10 @@ static void sifive_plic_irq_request(void *opaque, int irq, int level) { SiFivePLICState *s = opaque; - sifive_plic_set_pending(s, irq, level > 0); - sifive_plic_update(s); + if (level > 0) { + sifive_plic_set_pending(s, irq, true); + sifive_plic_update(s); + } } static void sifive_plic_realize(DeviceState *dev, Error **errp) diff --git a/hw/riscv/Kconfig b/hw/riscv/Kconfig index 44695ff9f2..2e88467c4a 100644 --- a/hw/riscv/Kconfig +++ b/hw/riscv/Kconfig @@ -1,3 +1,6 @@ +config RISCV_IOMMU + bool + config RISCV_NUMA bool @@ -47,6 +50,7 @@ config RISCV_VIRT select SERIAL_MM select RISCV_ACLINT select RISCV_APLIC + select RISCV_IOMMU select RISCV_IMSIC select SIFIVE_PLIC select SIFIVE_TEST diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c index 9115ecd91f..2e319168db 100644 --- a/hw/riscv/boot.c +++ b/hw/riscv/boot.c @@ -343,27 +343,33 @@ void riscv_load_fdt(hwaddr fdt_addr, void *fdt) rom_ptr_for_as(&address_space_memory, fdt_addr, fdtsize)); } -void riscv_rom_copy_firmware_info(MachineState *machine, hwaddr rom_base, - hwaddr rom_size, uint32_t reset_vec_size, +void riscv_rom_copy_firmware_info(MachineState *machine, + RISCVHartArrayState *harts, + hwaddr rom_base, hwaddr rom_size, + uint32_t reset_vec_size, uint64_t kernel_entry) { + struct fw_dynamic_info32 dinfo32; struct fw_dynamic_info dinfo; size_t dinfo_len; - if (sizeof(dinfo.magic) == 4) { - dinfo.magic = cpu_to_le32(FW_DYNAMIC_INFO_MAGIC_VALUE); - dinfo.version = cpu_to_le32(FW_DYNAMIC_INFO_VERSION); - dinfo.next_mode = cpu_to_le32(FW_DYNAMIC_INFO_NEXT_MODE_S); - dinfo.next_addr = cpu_to_le32(kernel_entry); + if (riscv_is_32bit(harts)) { + dinfo32.magic = cpu_to_le32(FW_DYNAMIC_INFO_MAGIC_VALUE); + dinfo32.version = cpu_to_le32(FW_DYNAMIC_INFO_VERSION); + dinfo32.next_mode = cpu_to_le32(FW_DYNAMIC_INFO_NEXT_MODE_S); + dinfo32.next_addr = cpu_to_le32(kernel_entry); + dinfo32.options = 0; + dinfo32.boot_hart = 0; + dinfo_len = sizeof(dinfo32); } else { dinfo.magic = cpu_to_le64(FW_DYNAMIC_INFO_MAGIC_VALUE); dinfo.version = cpu_to_le64(FW_DYNAMIC_INFO_VERSION); dinfo.next_mode = cpu_to_le64(FW_DYNAMIC_INFO_NEXT_MODE_S); dinfo.next_addr = cpu_to_le64(kernel_entry); + dinfo.options = 0; + dinfo.boot_hart = 0; + dinfo_len = sizeof(dinfo); } - dinfo.options = 0; - dinfo.boot_hart = 0; - dinfo_len = sizeof(dinfo); /** * copy the dynamic firmware info. This information is specific to @@ -375,7 +381,10 @@ void riscv_rom_copy_firmware_info(MachineState *machine, hwaddr rom_base, exit(1); } - rom_add_blob_fixed_as("mrom.finfo", &dinfo, dinfo_len, + rom_add_blob_fixed_as("mrom.finfo", + riscv_is_32bit(harts) ? + (void *)&dinfo32 : (void *)&dinfo, + dinfo_len, rom_base + reset_vec_size, &address_space_memory); } @@ -431,7 +440,9 @@ void riscv_setup_rom_reset_vec(MachineState *machine, RISCVHartArrayState *harts } rom_add_blob_fixed_as("mrom.reset", reset_vec, sizeof(reset_vec), rom_base, &address_space_memory); - riscv_rom_copy_firmware_info(machine, rom_base, rom_size, sizeof(reset_vec), + riscv_rom_copy_firmware_info(machine, harts, + rom_base, rom_size, + sizeof(reset_vec), kernel_entry); } diff --git a/hw/riscv/meson.build b/hw/riscv/meson.build index f872674093..adbef8a9b2 100644 --- a/hw/riscv/meson.build +++ b/hw/riscv/meson.build @@ -10,5 +10,6 @@ riscv_ss.add(when: 'CONFIG_SIFIVE_U', if_true: files('sifive_u.c')) riscv_ss.add(when: 'CONFIG_SPIKE', if_true: files('spike.c')) riscv_ss.add(when: 'CONFIG_MICROCHIP_PFSOC', if_true: files('microchip_pfsoc.c')) riscv_ss.add(when: 'CONFIG_ACPI', if_true: files('virt-acpi-build.c')) +riscv_ss.add(when: 'CONFIG_RISCV_IOMMU', if_true: files('riscv-iommu.c', 'riscv-iommu-pci.c')) hw_arch += {'riscv': riscv_ss} diff --git a/hw/riscv/riscv-iommu-bits.h b/hw/riscv/riscv-iommu-bits.h new file mode 100644 index 0000000000..6359ae0353 --- /dev/null +++ b/hw/riscv/riscv-iommu-bits.h @@ -0,0 +1,421 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright © 2022-2023 Rivos Inc. + * Copyright © 2023 FORTH-ICS/CARV + * Copyright © 2023 RISC-V IOMMU Task Group + * + * RISC-V IOMMU - Register Layout and Data Structures. + * + * Based on the IOMMU spec version 1.0, 3/2023 + * https://github.com/riscv-non-isa/riscv-iommu + */ + +#ifndef HW_RISCV_IOMMU_BITS_H +#define HW_RISCV_IOMMU_BITS_H + +#define RISCV_IOMMU_SPEC_DOT_VER 0x010 + +#ifndef GENMASK_ULL +#define GENMASK_ULL(h, l) (((~0ULL) >> (63 - (h) + (l))) << (l)) +#endif + +/* + * struct riscv_iommu_fq_record - Fault/Event Queue Record + * See section 3.2 for more info. + */ +struct riscv_iommu_fq_record { + uint64_t hdr; + uint64_t _reserved; + uint64_t iotval; + uint64_t iotval2; +}; +/* Header fields */ +#define RISCV_IOMMU_FQ_HDR_CAUSE GENMASK_ULL(11, 0) +#define RISCV_IOMMU_FQ_HDR_PID GENMASK_ULL(31, 12) +#define RISCV_IOMMU_FQ_HDR_PV BIT_ULL(32) +#define RISCV_IOMMU_FQ_HDR_TTYPE GENMASK_ULL(39, 34) +#define RISCV_IOMMU_FQ_HDR_DID GENMASK_ULL(63, 40) + +/* + * struct riscv_iommu_pq_record - PCIe Page Request record + * For more infos on the PCIe Page Request queue see chapter 3.3. + */ +struct riscv_iommu_pq_record { + uint64_t hdr; + uint64_t payload; +}; +/* Header fields */ +#define RISCV_IOMMU_PREQ_HDR_PID GENMASK_ULL(31, 12) +#define RISCV_IOMMU_PREQ_HDR_PV BIT_ULL(32) +#define RISCV_IOMMU_PREQ_HDR_PRIV BIT_ULL(33) +#define RISCV_IOMMU_PREQ_HDR_EXEC BIT_ULL(34) +#define RISCV_IOMMU_PREQ_HDR_DID GENMASK_ULL(63, 40) +/* Payload fields */ +#define RISCV_IOMMU_PREQ_PAYLOAD_M GENMASK_ULL(2, 0) + +/* Common field positions */ +#define RISCV_IOMMU_PPN_FIELD GENMASK_ULL(53, 10) +#define RISCV_IOMMU_QUEUE_LOGSZ_FIELD GENMASK_ULL(4, 0) +#define RISCV_IOMMU_QUEUE_INDEX_FIELD GENMASK_ULL(31, 0) +#define RISCV_IOMMU_QUEUE_ENABLE BIT(0) +#define RISCV_IOMMU_QUEUE_INTR_ENABLE BIT(1) +#define RISCV_IOMMU_QUEUE_MEM_FAULT BIT(8) +#define RISCV_IOMMU_QUEUE_OVERFLOW BIT(9) +#define RISCV_IOMMU_QUEUE_ACTIVE BIT(16) +#define RISCV_IOMMU_QUEUE_BUSY BIT(17) +#define RISCV_IOMMU_ATP_PPN_FIELD GENMASK_ULL(43, 0) +#define RISCV_IOMMU_ATP_MODE_FIELD GENMASK_ULL(63, 60) + +/* 5.3 IOMMU Capabilities (64bits) */ +#define RISCV_IOMMU_REG_CAP 0x0000 +#define RISCV_IOMMU_CAP_VERSION GENMASK_ULL(7, 0) +#define RISCV_IOMMU_CAP_SV32 BIT_ULL(8) +#define RISCV_IOMMU_CAP_SV39 BIT_ULL(9) +#define RISCV_IOMMU_CAP_SV48 BIT_ULL(10) +#define RISCV_IOMMU_CAP_SV57 BIT_ULL(11) +#define RISCV_IOMMU_CAP_SV32X4 BIT_ULL(16) +#define RISCV_IOMMU_CAP_SV39X4 BIT_ULL(17) +#define RISCV_IOMMU_CAP_SV48X4 BIT_ULL(18) +#define RISCV_IOMMU_CAP_SV57X4 BIT_ULL(19) +#define RISCV_IOMMU_CAP_MSI_FLAT BIT_ULL(22) +#define RISCV_IOMMU_CAP_MSI_MRIF BIT_ULL(23) +#define RISCV_IOMMU_CAP_ATS BIT_ULL(25) +#define RISCV_IOMMU_CAP_T2GPA BIT_ULL(26) +#define RISCV_IOMMU_CAP_IGS GENMASK_ULL(29, 28) +#define RISCV_IOMMU_CAP_DBG BIT_ULL(31) +#define RISCV_IOMMU_CAP_PAS GENMASK_ULL(37, 32) +#define RISCV_IOMMU_CAP_PD8 BIT_ULL(38) +#define RISCV_IOMMU_CAP_PD17 BIT_ULL(39) +#define RISCV_IOMMU_CAP_PD20 BIT_ULL(40) + +/* 5.4 Features control register (32bits) */ +#define RISCV_IOMMU_REG_FCTL 0x0008 +#define RISCV_IOMMU_FCTL_BE BIT(0) +#define RISCV_IOMMU_FCTL_WSI BIT(1) +#define RISCV_IOMMU_FCTL_GXL BIT(2) + +/* 5.5 Device-directory-table pointer (64bits) */ +#define RISCV_IOMMU_REG_DDTP 0x0010 +#define RISCV_IOMMU_DDTP_MODE GENMASK_ULL(3, 0) +#define RISCV_IOMMU_DDTP_BUSY BIT_ULL(4) +#define RISCV_IOMMU_DDTP_PPN RISCV_IOMMU_PPN_FIELD + +enum riscv_iommu_ddtp_modes { + RISCV_IOMMU_DDTP_MODE_OFF = 0, + RISCV_IOMMU_DDTP_MODE_BARE = 1, + RISCV_IOMMU_DDTP_MODE_1LVL = 2, + RISCV_IOMMU_DDTP_MODE_2LVL = 3, + RISCV_IOMMU_DDTP_MODE_3LVL = 4, + RISCV_IOMMU_DDTP_MODE_MAX = 4 +}; + +/* 5.6 Command Queue Base (64bits) */ +#define RISCV_IOMMU_REG_CQB 0x0018 +#define RISCV_IOMMU_CQB_LOG2SZ RISCV_IOMMU_QUEUE_LOGSZ_FIELD +#define RISCV_IOMMU_CQB_PPN RISCV_IOMMU_PPN_FIELD + +/* 5.7 Command Queue head (32bits) */ +#define RISCV_IOMMU_REG_CQH 0x0020 + +/* 5.8 Command Queue tail (32bits) */ +#define RISCV_IOMMU_REG_CQT 0x0024 + +/* 5.9 Fault Queue Base (64bits) */ +#define RISCV_IOMMU_REG_FQB 0x0028 +#define RISCV_IOMMU_FQB_LOG2SZ RISCV_IOMMU_QUEUE_LOGSZ_FIELD +#define RISCV_IOMMU_FQB_PPN RISCV_IOMMU_PPN_FIELD + +/* 5.10 Fault Queue Head (32bits) */ +#define RISCV_IOMMU_REG_FQH 0x0030 + +/* 5.11 Fault Queue tail (32bits) */ +#define RISCV_IOMMU_REG_FQT 0x0034 + +/* 5.12 Page Request Queue base (64bits) */ +#define RISCV_IOMMU_REG_PQB 0x0038 +#define RISCV_IOMMU_PQB_LOG2SZ RISCV_IOMMU_QUEUE_LOGSZ_FIELD +#define RISCV_IOMMU_PQB_PPN RISCV_IOMMU_PPN_FIELD + +/* 5.13 Page Request Queue head (32bits) */ +#define RISCV_IOMMU_REG_PQH 0x0040 + +/* 5.14 Page Request Queue tail (32bits) */ +#define RISCV_IOMMU_REG_PQT 0x0044 + +/* 5.15 Command Queue CSR (32bits) */ +#define RISCV_IOMMU_REG_CQCSR 0x0048 +#define RISCV_IOMMU_CQCSR_CQEN RISCV_IOMMU_QUEUE_ENABLE +#define RISCV_IOMMU_CQCSR_CIE RISCV_IOMMU_QUEUE_INTR_ENABLE +#define RISCV_IOMMU_CQCSR_CQMF RISCV_IOMMU_QUEUE_MEM_FAULT +#define RISCV_IOMMU_CQCSR_CMD_TO BIT(9) +#define RISCV_IOMMU_CQCSR_CMD_ILL BIT(10) +#define RISCV_IOMMU_CQCSR_FENCE_W_IP BIT(11) +#define RISCV_IOMMU_CQCSR_CQON RISCV_IOMMU_QUEUE_ACTIVE +#define RISCV_IOMMU_CQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY + +/* 5.16 Fault Queue CSR (32bits) */ +#define RISCV_IOMMU_REG_FQCSR 0x004C +#define RISCV_IOMMU_FQCSR_FQEN RISCV_IOMMU_QUEUE_ENABLE +#define RISCV_IOMMU_FQCSR_FIE RISCV_IOMMU_QUEUE_INTR_ENABLE +#define RISCV_IOMMU_FQCSR_FQMF RISCV_IOMMU_QUEUE_MEM_FAULT +#define RISCV_IOMMU_FQCSR_FQOF RISCV_IOMMU_QUEUE_OVERFLOW +#define RISCV_IOMMU_FQCSR_FQON RISCV_IOMMU_QUEUE_ACTIVE +#define RISCV_IOMMU_FQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY + +/* 5.17 Page Request Queue CSR (32bits) */ +#define RISCV_IOMMU_REG_PQCSR 0x0050 +#define RISCV_IOMMU_PQCSR_PQEN RISCV_IOMMU_QUEUE_ENABLE +#define RISCV_IOMMU_PQCSR_PIE RISCV_IOMMU_QUEUE_INTR_ENABLE +#define RISCV_IOMMU_PQCSR_PQMF RISCV_IOMMU_QUEUE_MEM_FAULT +#define RISCV_IOMMU_PQCSR_PQOF RISCV_IOMMU_QUEUE_OVERFLOW +#define RISCV_IOMMU_PQCSR_PQON RISCV_IOMMU_QUEUE_ACTIVE +#define RISCV_IOMMU_PQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY + +/* 5.18 Interrupt Pending Status (32bits) */ +#define RISCV_IOMMU_REG_IPSR 0x0054 +#define RISCV_IOMMU_IPSR_CIP BIT(0) +#define RISCV_IOMMU_IPSR_FIP BIT(1) +#define RISCV_IOMMU_IPSR_PIP BIT(3) + +enum { + RISCV_IOMMU_INTR_CQ, + RISCV_IOMMU_INTR_FQ, + RISCV_IOMMU_INTR_PM, + RISCV_IOMMU_INTR_PQ, + RISCV_IOMMU_INTR_COUNT +}; + +/* 5.24 Translation request IOVA (64bits) */ +#define RISCV_IOMMU_REG_TR_REQ_IOVA 0x0258 + +/* 5.25 Translation request control (64bits) */ +#define RISCV_IOMMU_REG_TR_REQ_CTL 0x0260 +#define RISCV_IOMMU_TR_REQ_CTL_GO_BUSY BIT_ULL(0) +#define RISCV_IOMMU_TR_REQ_CTL_NW BIT_ULL(3) +#define RISCV_IOMMU_TR_REQ_CTL_PID GENMASK_ULL(31, 12) +#define RISCV_IOMMU_TR_REQ_CTL_DID GENMASK_ULL(63, 40) + +/* 5.26 Translation request response (64bits) */ +#define RISCV_IOMMU_REG_TR_RESPONSE 0x0268 +#define RISCV_IOMMU_TR_RESPONSE_FAULT BIT_ULL(0) +#define RISCV_IOMMU_TR_RESPONSE_S BIT_ULL(9) +#define RISCV_IOMMU_TR_RESPONSE_PPN RISCV_IOMMU_PPN_FIELD + +/* 5.27 Interrupt cause to vector (64bits) */ +#define RISCV_IOMMU_REG_ICVEC 0x02F8 +#define RISCV_IOMMU_ICVEC_CIV GENMASK_ULL(3, 0) +#define RISCV_IOMMU_ICVEC_FIV GENMASK_ULL(7, 4) +#define RISCV_IOMMU_ICVEC_PMIV GENMASK_ULL(11, 8) +#define RISCV_IOMMU_ICVEC_PIV GENMASK_ULL(15, 12) + +/* 5.28 MSI Configuration table (32 * 64bits) */ +#define RISCV_IOMMU_REG_MSI_CONFIG 0x0300 + +#define RISCV_IOMMU_REG_SIZE 0x1000 + +#define RISCV_IOMMU_DDTE_VALID BIT_ULL(0) +#define RISCV_IOMMU_DDTE_PPN RISCV_IOMMU_PPN_FIELD + +/* Struct riscv_iommu_dc - Device Context - section 2.1 */ +struct riscv_iommu_dc { + uint64_t tc; + uint64_t iohgatp; + uint64_t ta; + uint64_t fsc; + uint64_t msiptp; + uint64_t msi_addr_mask; + uint64_t msi_addr_pattern; + uint64_t _reserved; +}; + +/* Translation control fields */ +#define RISCV_IOMMU_DC_TC_V BIT_ULL(0) +#define RISCV_IOMMU_DC_TC_EN_ATS BIT_ULL(1) +#define RISCV_IOMMU_DC_TC_EN_PRI BIT_ULL(2) +#define RISCV_IOMMU_DC_TC_T2GPA BIT_ULL(3) +#define RISCV_IOMMU_DC_TC_DTF BIT_ULL(4) +#define RISCV_IOMMU_DC_TC_PDTV BIT_ULL(5) +#define RISCV_IOMMU_DC_TC_PRPR BIT_ULL(6) +#define RISCV_IOMMU_DC_TC_GADE BIT_ULL(7) +#define RISCV_IOMMU_DC_TC_SADE BIT_ULL(8) +#define RISCV_IOMMU_DC_TC_DPE BIT_ULL(9) +#define RISCV_IOMMU_DC_TC_SBE BIT_ULL(10) +#define RISCV_IOMMU_DC_TC_SXL BIT_ULL(11) + +/* Second-stage (aka G-stage) context fields */ +#define RISCV_IOMMU_DC_IOHGATP_PPN RISCV_IOMMU_ATP_PPN_FIELD +#define RISCV_IOMMU_DC_IOHGATP_GSCID GENMASK_ULL(59, 44) +#define RISCV_IOMMU_DC_IOHGATP_MODE RISCV_IOMMU_ATP_MODE_FIELD + +enum riscv_iommu_dc_iohgatp_modes { + RISCV_IOMMU_DC_IOHGATP_MODE_BARE = 0, + RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4 = 8, + RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4 = 8, + RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4 = 9, + RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4 = 10 +}; + +/* Translation attributes fields */ +#define RISCV_IOMMU_DC_TA_PSCID GENMASK_ULL(31, 12) + +/* First-stage context fields */ +#define RISCV_IOMMU_DC_FSC_PPN RISCV_IOMMU_ATP_PPN_FIELD +#define RISCV_IOMMU_DC_FSC_MODE RISCV_IOMMU_ATP_MODE_FIELD + +/* Generic I/O MMU command structure - check section 3.1 */ +struct riscv_iommu_command { + uint64_t dword0; + uint64_t dword1; +}; + +#define RISCV_IOMMU_CMD_OPCODE GENMASK_ULL(6, 0) +#define RISCV_IOMMU_CMD_FUNC GENMASK_ULL(9, 7) + +#define RISCV_IOMMU_CMD_IOTINVAL_OPCODE 1 +#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA 0 +#define RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA 1 +#define RISCV_IOMMU_CMD_IOTINVAL_AV BIT_ULL(10) +#define RISCV_IOMMU_CMD_IOTINVAL_PSCID GENMASK_ULL(31, 12) +#define RISCV_IOMMU_CMD_IOTINVAL_PSCV BIT_ULL(32) +#define RISCV_IOMMU_CMD_IOTINVAL_GV BIT_ULL(33) +#define RISCV_IOMMU_CMD_IOTINVAL_GSCID GENMASK_ULL(59, 44) + +#define RISCV_IOMMU_CMD_IOFENCE_OPCODE 2 +#define RISCV_IOMMU_CMD_IOFENCE_FUNC_C 0 +#define RISCV_IOMMU_CMD_IOFENCE_AV BIT_ULL(10) +#define RISCV_IOMMU_CMD_IOFENCE_DATA GENMASK_ULL(63, 32) + +#define RISCV_IOMMU_CMD_IODIR_OPCODE 3 +#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT 0 +#define RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT 1 +#define RISCV_IOMMU_CMD_IODIR_PID GENMASK_ULL(31, 12) +#define RISCV_IOMMU_CMD_IODIR_DV BIT_ULL(33) +#define RISCV_IOMMU_CMD_IODIR_DID GENMASK_ULL(63, 40) + +/* 3.1.4 I/O MMU PCIe ATS */ +#define RISCV_IOMMU_CMD_ATS_OPCODE 4 +#define RISCV_IOMMU_CMD_ATS_FUNC_INVAL 0 +#define RISCV_IOMMU_CMD_ATS_FUNC_PRGR 1 +#define RISCV_IOMMU_CMD_ATS_PID GENMASK_ULL(31, 12) +#define RISCV_IOMMU_CMD_ATS_PV BIT_ULL(32) +#define RISCV_IOMMU_CMD_ATS_DSV BIT_ULL(33) +#define RISCV_IOMMU_CMD_ATS_RID GENMASK_ULL(55, 40) +#define RISCV_IOMMU_CMD_ATS_DSEG GENMASK_ULL(63, 56) +/* dword1 is the ATS payload, two different payload types for INVAL and PRGR */ + +/* ATS.PRGR payload */ +#define RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE GENMASK_ULL(47, 44) + +enum riscv_iommu_dc_fsc_atp_modes { + RISCV_IOMMU_DC_FSC_MODE_BARE = 0, + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV32 = 8, + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 = 8, + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48 = 9, + RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57 = 10, + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8 = 1, + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17 = 2, + RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20 = 3 +}; + +enum riscv_iommu_fq_causes { + RISCV_IOMMU_FQ_CAUSE_INST_FAULT = 1, + RISCV_IOMMU_FQ_CAUSE_RD_ADDR_MISALIGNED = 4, + RISCV_IOMMU_FQ_CAUSE_RD_FAULT = 5, + RISCV_IOMMU_FQ_CAUSE_WR_ADDR_MISALIGNED = 6, + RISCV_IOMMU_FQ_CAUSE_WR_FAULT = 7, + RISCV_IOMMU_FQ_CAUSE_INST_FAULT_S = 12, + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S = 13, + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S = 15, + RISCV_IOMMU_FQ_CAUSE_INST_FAULT_VS = 20, + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS = 21, + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS = 23, + RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED = 256, + RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT = 257, + RISCV_IOMMU_FQ_CAUSE_DDT_INVALID = 258, + RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED = 259, + RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED = 260, + RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT = 261, + RISCV_IOMMU_FQ_CAUSE_MSI_INVALID = 262, + RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED = 263, + RISCV_IOMMU_FQ_CAUSE_MRIF_FAULT = 264, + RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT = 265, + RISCV_IOMMU_FQ_CAUSE_PDT_INVALID = 266, + RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED = 267, + RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED = 268, + RISCV_IOMMU_FQ_CAUSE_PDT_CORRUPTED = 269, + RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED = 270, + RISCV_IOMMU_FQ_CAUSE_MRIF_CORRUIPTED = 271, + RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR = 272, + RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT = 273, + RISCV_IOMMU_FQ_CAUSE_PT_CORRUPTED = 274 +}; + +/* MSI page table pointer */ +#define RISCV_IOMMU_DC_MSIPTP_PPN RISCV_IOMMU_ATP_PPN_FIELD +#define RISCV_IOMMU_DC_MSIPTP_MODE RISCV_IOMMU_ATP_MODE_FIELD +#define RISCV_IOMMU_DC_MSIPTP_MODE_OFF 0 +#define RISCV_IOMMU_DC_MSIPTP_MODE_FLAT 1 + +/* Translation attributes fields */ +#define RISCV_IOMMU_PC_TA_V BIT_ULL(0) +#define RISCV_IOMMU_PC_TA_RESERVED GENMASK_ULL(63, 32) + +/* First stage context fields */ +#define RISCV_IOMMU_PC_FSC_PPN GENMASK_ULL(43, 0) +#define RISCV_IOMMU_PC_FSC_RESERVED GENMASK_ULL(59, 44) + +enum riscv_iommu_fq_ttypes { + RISCV_IOMMU_FQ_TTYPE_NONE = 0, + RISCV_IOMMU_FQ_TTYPE_UADDR_INST_FETCH = 1, + RISCV_IOMMU_FQ_TTYPE_UADDR_RD = 2, + RISCV_IOMMU_FQ_TTYPE_UADDR_WR = 3, + RISCV_IOMMU_FQ_TTYPE_TADDR_INST_FETCH = 5, + RISCV_IOMMU_FQ_TTYPE_TADDR_RD = 6, + RISCV_IOMMU_FQ_TTYPE_TADDR_WR = 7, + RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ = 8, + RISCV_IOMMU_FW_TTYPE_PCIE_MSG_REQ = 9, +}; + +/* Header fields */ +#define RISCV_IOMMU_PREQ_HDR_PID GENMASK_ULL(31, 12) +#define RISCV_IOMMU_PREQ_HDR_PV BIT_ULL(32) +#define RISCV_IOMMU_PREQ_HDR_PRIV BIT_ULL(33) +#define RISCV_IOMMU_PREQ_HDR_EXEC BIT_ULL(34) +#define RISCV_IOMMU_PREQ_HDR_DID GENMASK_ULL(63, 40) + +/* Payload fields */ +#define RISCV_IOMMU_PREQ_PAYLOAD_R BIT_ULL(0) +#define RISCV_IOMMU_PREQ_PAYLOAD_W BIT_ULL(1) +#define RISCV_IOMMU_PREQ_PAYLOAD_L BIT_ULL(2) +#define RISCV_IOMMU_PREQ_PAYLOAD_M GENMASK_ULL(2, 0) +#define RISCV_IOMMU_PREQ_PRG_INDEX GENMASK_ULL(11, 3) +#define RISCV_IOMMU_PREQ_UADDR GENMASK_ULL(63, 12) + + +/* + * struct riscv_iommu_msi_pte - MSI Page Table Entry + */ +struct riscv_iommu_msi_pte { + uint64_t pte; + uint64_t mrif_info; +}; + +/* Fields on pte */ +#define RISCV_IOMMU_MSI_PTE_V BIT_ULL(0) +#define RISCV_IOMMU_MSI_PTE_M GENMASK_ULL(2, 1) + +#define RISCV_IOMMU_MSI_PTE_M_MRIF 1 +#define RISCV_IOMMU_MSI_PTE_M_BASIC 3 + +/* When M == 1 (MRIF mode) */ +#define RISCV_IOMMU_MSI_PTE_MRIF_ADDR GENMASK_ULL(53, 7) +/* When M == 3 (basic mode) */ +#define RISCV_IOMMU_MSI_PTE_PPN RISCV_IOMMU_PPN_FIELD +#define RISCV_IOMMU_MSI_PTE_C BIT_ULL(63) + +/* Fields on mrif_info */ +#define RISCV_IOMMU_MSI_MRIF_NID GENMASK_ULL(9, 0) +#define RISCV_IOMMU_MSI_MRIF_NPPN RISCV_IOMMU_PPN_FIELD +#define RISCV_IOMMU_MSI_MRIF_NID_MSB BIT_ULL(60) + +#endif /* _RISCV_IOMMU_BITS_H_ */ diff --git a/hw/riscv/riscv-iommu-pci.c b/hw/riscv/riscv-iommu-pci.c new file mode 100644 index 0000000000..a42242532d --- /dev/null +++ b/hw/riscv/riscv-iommu-pci.c @@ -0,0 +1,202 @@ +/* + * QEMU emulation of an RISC-V IOMMU + * + * Copyright (C) 2022-2023 Rivos Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "hw/pci/pci_bus.h" +#include "hw/qdev-properties.h" +#include "hw/riscv/riscv_hart.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "qemu/host-utils.h" +#include "qom/object.h" + +#include "cpu_bits.h" +#include "riscv-iommu.h" +#include "riscv-iommu-bits.h" + +/* RISC-V IOMMU PCI Device Emulation */ +#define RISCV_PCI_CLASS_SYSTEM_IOMMU 0x0806 + +/* + * 4 MSIx vectors for ICVEC, one for MRIF. The spec mentions in + * the "Placement and data flow" section that: + * + * "The interfaces related to recording an incoming MSI in a memory-resident + * interrupt file (MRIF) are implementation-specific. The partitioning of + * responsibility between the IOMMU and the IO bridge for recording the + * incoming MSI in an MRIF and generating the associated notice MSI are + * implementation-specific." + * + * We're making a design decision to create the MSIx for MRIF in the + * IOMMU MSIx emulation. + */ +#define RISCV_IOMMU_PCI_MSIX_VECTORS 5 + +/* + * 4 vectors that can be used by civ, fiv, pmiv and piv. Number of + * vectors is represented by 2^N, where N = number of writable bits + * in each cause. For 4 vectors we'll write 0b11 (3) in each reg. + */ +#define RISCV_IOMMU_PCI_ICVEC_VECTORS 0x3333 + +typedef struct RISCVIOMMUStatePci { + PCIDevice pci; /* Parent PCIe device state */ + uint16_t vendor_id; + uint16_t device_id; + uint8_t revision; + MemoryRegion bar0; /* PCI BAR (including MSI-x config) */ + RISCVIOMMUState iommu; /* common IOMMU state */ +} RISCVIOMMUStatePci; + +/* interrupt delivery callback */ +static void riscv_iommu_pci_notify(RISCVIOMMUState *iommu, unsigned vector) +{ + RISCVIOMMUStatePci *s = container_of(iommu, RISCVIOMMUStatePci, iommu); + + if (msix_enabled(&(s->pci))) { + msix_notify(&(s->pci), vector); + } +} + +static void riscv_iommu_pci_realize(PCIDevice *dev, Error **errp) +{ + RISCVIOMMUStatePci *s = DO_UPCAST(RISCVIOMMUStatePci, pci, dev); + RISCVIOMMUState *iommu = &s->iommu; + uint8_t *pci_conf = dev->config; + Error *err = NULL; + + pci_set_word(pci_conf + PCI_VENDOR_ID, s->vendor_id); + pci_set_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID, s->vendor_id); + pci_set_word(pci_conf + PCI_DEVICE_ID, s->device_id); + pci_set_word(pci_conf + PCI_SUBSYSTEM_ID, s->device_id); + pci_set_byte(pci_conf + PCI_REVISION_ID, s->revision); + + /* Set device id for trace / debug */ + DEVICE(iommu)->id = g_strdup_printf("%02x:%02x.%01x", + pci_dev_bus_num(dev), PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); + qdev_realize(DEVICE(iommu), NULL, errp); + + memory_region_init(&s->bar0, OBJECT(s), "riscv-iommu-bar0", + QEMU_ALIGN_UP(memory_region_size(&iommu->regs_mr), TARGET_PAGE_SIZE)); + memory_region_add_subregion(&s->bar0, 0, &iommu->regs_mr); + + pcie_endpoint_cap_init(dev, 0); + + pci_register_bar(dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_64, &s->bar0); + + int ret = msix_init(dev, RISCV_IOMMU_PCI_MSIX_VECTORS, + &s->bar0, 0, RISCV_IOMMU_REG_MSI_CONFIG, + &s->bar0, 0, RISCV_IOMMU_REG_MSI_CONFIG + 256, 0, &err); + + if (ret == -ENOTSUP) { + /* + * MSI-x is not supported by the platform. + * Driver should use timer/polling based notification handlers. + */ + warn_report_err(err); + } else if (ret < 0) { + error_propagate(errp, err); + return; + } else { + /* Mark all ICVEC MSIx vectors as used */ + for (int i = 0; i < RISCV_IOMMU_PCI_MSIX_VECTORS; i++) { + msix_vector_use(dev, i); + } + + iommu->notify = riscv_iommu_pci_notify; + } + + PCIBus *bus = pci_device_root_bus(dev); + if (!bus) { + error_setg(errp, "can't find PCIe root port for %02x:%02x.%x", + pci_bus_num(pci_get_bus(dev)), PCI_SLOT(dev->devfn), + PCI_FUNC(dev->devfn)); + return; + } + + riscv_iommu_pci_setup_iommu(iommu, bus, errp); +} + +static void riscv_iommu_pci_exit(PCIDevice *pci_dev) +{ + pci_setup_iommu(pci_device_root_bus(pci_dev), NULL, NULL); +} + +static const VMStateDescription riscv_iommu_vmstate = { + .name = "riscv-iommu", + .unmigratable = 1 +}; + +static void riscv_iommu_pci_init(Object *obj) +{ + RISCVIOMMUStatePci *s = RISCV_IOMMU_PCI(obj); + RISCVIOMMUState *iommu = &s->iommu; + + object_initialize_child(obj, "iommu", iommu, TYPE_RISCV_IOMMU); + qdev_alias_all_properties(DEVICE(iommu), obj); + + iommu->icvec_avail_vectors = RISCV_IOMMU_PCI_ICVEC_VECTORS; +} + +static Property riscv_iommu_pci_properties[] = { + DEFINE_PROP_UINT16("vendor-id", RISCVIOMMUStatePci, vendor_id, + PCI_VENDOR_ID_REDHAT), + DEFINE_PROP_UINT16("device-id", RISCVIOMMUStatePci, device_id, + PCI_DEVICE_ID_REDHAT_RISCV_IOMMU), + DEFINE_PROP_UINT8("revision", RISCVIOMMUStatePci, revision, 0x01), + DEFINE_PROP_END_OF_LIST(), +}; + +static void riscv_iommu_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->realize = riscv_iommu_pci_realize; + k->exit = riscv_iommu_pci_exit; + k->class_id = RISCV_PCI_CLASS_SYSTEM_IOMMU; + dc->desc = "RISCV-IOMMU DMA Remapping device"; + dc->vmsd = &riscv_iommu_vmstate; + dc->hotpluggable = false; + dc->user_creatable = true; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + device_class_set_props(dc, riscv_iommu_pci_properties); +} + +static const TypeInfo riscv_iommu_pci = { + .name = TYPE_RISCV_IOMMU_PCI, + .parent = TYPE_PCI_DEVICE, + .class_init = riscv_iommu_pci_class_init, + .instance_init = riscv_iommu_pci_init, + .instance_size = sizeof(RISCVIOMMUStatePci), + .interfaces = (InterfaceInfo[]) { + { INTERFACE_PCIE_DEVICE }, + { }, + }, +}; + +static void riscv_iommu_register_pci_types(void) +{ + type_register_static(&riscv_iommu_pci); +} + +type_init(riscv_iommu_register_pci_types); diff --git a/hw/riscv/riscv-iommu.c b/hw/riscv/riscv-iommu.c new file mode 100644 index 0000000000..feb650549a --- /dev/null +++ b/hw/riscv/riscv-iommu.c @@ -0,0 +1,2399 @@ +/* + * QEMU emulation of an RISC-V IOMMU + * + * Copyright (C) 2021-2023, Rivos Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#include "qemu/osdep.h" +#include "qom/object.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci_device.h" +#include "hw/qdev-properties.h" +#include "hw/riscv/riscv_hart.h" +#include "migration/vmstate.h" +#include "qapi/error.h" +#include "qemu/timer.h" + +#include "cpu_bits.h" +#include "riscv-iommu.h" +#include "riscv-iommu-bits.h" +#include "trace.h" + +#define LIMIT_CACHE_CTX (1U << 7) +#define LIMIT_CACHE_IOT (1U << 20) + +/* Physical page number coversions */ +#define PPN_PHYS(ppn) ((ppn) << TARGET_PAGE_BITS) +#define PPN_DOWN(phy) ((phy) >> TARGET_PAGE_BITS) + +typedef struct RISCVIOMMUContext RISCVIOMMUContext; +typedef struct RISCVIOMMUEntry RISCVIOMMUEntry; + +/* Device assigned I/O address space */ +struct RISCVIOMMUSpace { + IOMMUMemoryRegion iova_mr; /* IOVA memory region for attached device */ + AddressSpace iova_as; /* IOVA address space for attached device */ + RISCVIOMMUState *iommu; /* Managing IOMMU device state */ + uint32_t devid; /* Requester identifier, AKA device_id */ + bool notifier; /* IOMMU unmap notifier enabled */ + QLIST_ENTRY(RISCVIOMMUSpace) list; +}; + +/* Device translation context state. */ +struct RISCVIOMMUContext { + uint64_t devid:24; /* Requester Id, AKA device_id */ + uint64_t process_id:20; /* Process ID. PASID for PCIe */ + uint64_t tc; /* Translation Control */ + uint64_t ta; /* Translation Attributes */ + uint64_t satp; /* S-Stage address translation and protection */ + uint64_t gatp; /* G-Stage address translation and protection */ + uint64_t msi_addr_mask; /* MSI filtering - address mask */ + uint64_t msi_addr_pattern; /* MSI filtering - address pattern */ + uint64_t msiptp; /* MSI redirection page table pointer */ +}; + +/* Address translation cache entry */ +struct RISCVIOMMUEntry { + uint64_t iova:44; /* IOVA Page Number */ + uint64_t pscid:20; /* Process Soft-Context identifier */ + uint64_t phys:44; /* Physical Page Number */ + uint64_t gscid:16; /* Guest Soft-Context identifier */ + uint64_t perm:2; /* IOMMU_RW flags */ +}; + +/* IOMMU index for transactions without process_id specified. */ +#define RISCV_IOMMU_NOPROCID 0 + +static uint8_t riscv_iommu_get_icvec_vector(uint32_t icvec, uint32_t vec_type) +{ + switch (vec_type) { + case RISCV_IOMMU_INTR_CQ: + return icvec & RISCV_IOMMU_ICVEC_CIV; + case RISCV_IOMMU_INTR_FQ: + return (icvec & RISCV_IOMMU_ICVEC_FIV) >> 4; + case RISCV_IOMMU_INTR_PM: + return (icvec & RISCV_IOMMU_ICVEC_PMIV) >> 8; + case RISCV_IOMMU_INTR_PQ: + return (icvec & RISCV_IOMMU_ICVEC_PIV) >> 12; + default: + g_assert_not_reached(); + } +} + +static void riscv_iommu_notify(RISCVIOMMUState *s, int vec_type) +{ + const uint32_t fctl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FCTL); + uint32_t ipsr, icvec, vector; + + if (fctl & RISCV_IOMMU_FCTL_WSI || !s->notify) { + return; + } + + icvec = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_ICVEC); + ipsr = riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, (1 << vec_type), 0); + + if (!(ipsr & (1 << vec_type))) { + vector = riscv_iommu_get_icvec_vector(icvec, vec_type); + s->notify(s, vector); + trace_riscv_iommu_notify_int_vector(vec_type, vector); + } +} + +static void riscv_iommu_fault(RISCVIOMMUState *s, + struct riscv_iommu_fq_record *ev) +{ + uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); + uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQH) & s->fq_mask; + uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQT) & s->fq_mask; + uint32_t next = (tail + 1) & s->fq_mask; + uint32_t devid = get_field(ev->hdr, RISCV_IOMMU_FQ_HDR_DID); + + trace_riscv_iommu_flt(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), + PCI_FUNC(devid), ev->hdr, ev->iotval); + + if (!(ctrl & RISCV_IOMMU_FQCSR_FQON) || + !!(ctrl & (RISCV_IOMMU_FQCSR_FQOF | RISCV_IOMMU_FQCSR_FQMF))) { + return; + } + + if (head == next) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, + RISCV_IOMMU_FQCSR_FQOF, 0); + } else { + dma_addr_t addr = s->fq_addr + tail * sizeof(*ev); + if (dma_memory_write(s->target_as, addr, ev, sizeof(*ev), + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, + RISCV_IOMMU_FQCSR_FQMF, 0); + } else { + riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_FQT, next); + } + } + + if (ctrl & RISCV_IOMMU_FQCSR_FIE) { + riscv_iommu_notify(s, RISCV_IOMMU_INTR_FQ); + } +} + +static void riscv_iommu_pri(RISCVIOMMUState *s, + struct riscv_iommu_pq_record *pr) +{ + uint32_t ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); + uint32_t head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQH) & s->pq_mask; + uint32_t tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQT) & s->pq_mask; + uint32_t next = (tail + 1) & s->pq_mask; + uint32_t devid = get_field(pr->hdr, RISCV_IOMMU_PREQ_HDR_DID); + + trace_riscv_iommu_pri(s->parent_obj.id, PCI_BUS_NUM(devid), PCI_SLOT(devid), + PCI_FUNC(devid), pr->payload); + + if (!(ctrl & RISCV_IOMMU_PQCSR_PQON) || + !!(ctrl & (RISCV_IOMMU_PQCSR_PQOF | RISCV_IOMMU_PQCSR_PQMF))) { + return; + } + + if (head == next) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, + RISCV_IOMMU_PQCSR_PQOF, 0); + } else { + dma_addr_t addr = s->pq_addr + tail * sizeof(*pr); + if (dma_memory_write(s->target_as, addr, pr, sizeof(*pr), + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, + RISCV_IOMMU_PQCSR_PQMF, 0); + } else { + riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_PQT, next); + } + } + + if (ctrl & RISCV_IOMMU_PQCSR_PIE) { + riscv_iommu_notify(s, RISCV_IOMMU_INTR_PQ); + } +} + +/* Portable implementation of pext_u64, bit-mask extraction. */ +static uint64_t _pext_u64(uint64_t val, uint64_t ext) +{ + uint64_t ret = 0; + uint64_t rot = 1; + + while (ext) { + if (ext & 1) { + if (val & 1) { + ret |= rot; + } + rot <<= 1; + } + val >>= 1; + ext >>= 1; + } + + return ret; +} + +/* Check if GPA matches MSI/MRIF pattern. */ +static bool riscv_iommu_msi_check(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, + dma_addr_t gpa) +{ + if (!s->enable_msi) { + return false; + } + + if (get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE) != + RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { + return false; /* Invalid MSI/MRIF mode */ + } + + if ((PPN_DOWN(gpa) ^ ctx->msi_addr_pattern) & ~ctx->msi_addr_mask) { + return false; /* GPA not in MSI range defined by AIA IMSIC rules. */ + } + + return true; +} + +/* + * RISCV IOMMU Address Translation Lookup - Page Table Walk + * + * Note: Code is based on get_physical_address() from target/riscv/cpu_helper.c + * Both implementation can be merged into single helper function in future. + * Keeping them separate for now, as error reporting and flow specifics are + * sufficiently different for separate implementation. + * + * @s : IOMMU Device State + * @ctx : Translation context for device id and process address space id. + * @iotlb : translation data: physical address and access mode. + * @return : success or fault cause code. + */ +static int riscv_iommu_spa_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, + IOMMUTLBEntry *iotlb) +{ + dma_addr_t addr, base; + uint64_t satp, gatp, pte; + bool en_s, en_g; + struct { + unsigned char step; + unsigned char levels; + unsigned char ptidxbits; + unsigned char ptesize; + } sc[2]; + /* Translation stage phase */ + enum { + S_STAGE = 0, + G_STAGE = 1, + } pass; + MemTxResult ret; + + satp = get_field(ctx->satp, RISCV_IOMMU_ATP_MODE_FIELD); + gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); + + en_s = satp != RISCV_IOMMU_DC_FSC_MODE_BARE; + en_g = gatp != RISCV_IOMMU_DC_IOHGATP_MODE_BARE; + + /* + * Early check for MSI address match when IOVA == GPA. + * Note that the (!en_s) condition means that the MSI + * page table may only be used when guest pages are + * mapped using the g-stage page table, whether single- + * or two-stage paging is enabled. It's unavoidable though, + * because the spec mandates that we do a first-stage + * translation before we check the MSI page table, which + * means we can't do an early MSI check unless we have + * strictly !en_s. + */ + if (!en_s && (iotlb->perm & IOMMU_WO) && + riscv_iommu_msi_check(s, ctx, iotlb->iova)) { + iotlb->target_as = &s->trap_as; + iotlb->translated_addr = iotlb->iova; + iotlb->addr_mask = ~TARGET_PAGE_MASK; + return 0; + } + + /* Exit early for pass-through mode. */ + if (!(en_s || en_g)) { + iotlb->translated_addr = iotlb->iova; + iotlb->addr_mask = ~TARGET_PAGE_MASK; + /* Allow R/W in pass-through mode */ + iotlb->perm = IOMMU_RW; + return 0; + } + + /* S/G translation parameters. */ + for (pass = 0; pass < 2; pass++) { + uint32_t sv_mode; + + sc[pass].step = 0; + if (pass ? (s->fctl & RISCV_IOMMU_FCTL_GXL) : + (ctx->tc & RISCV_IOMMU_DC_TC_SXL)) { + /* 32bit mode for GXL/SXL == 1 */ + switch (pass ? gatp : satp) { + case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: + sc[pass].levels = 0; + sc[pass].ptidxbits = 0; + sc[pass].ptesize = 0; + break; + case RISCV_IOMMU_DC_IOHGATP_MODE_SV32X4: + sv_mode = pass ? RISCV_IOMMU_CAP_SV32X4 : RISCV_IOMMU_CAP_SV32; + if (!(s->cap & sv_mode)) { + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + sc[pass].levels = 2; + sc[pass].ptidxbits = 10; + sc[pass].ptesize = 4; + break; + default: + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + } else { + /* 64bit mode for GXL/SXL == 0 */ + switch (pass ? gatp : satp) { + case RISCV_IOMMU_DC_IOHGATP_MODE_BARE: + sc[pass].levels = 0; + sc[pass].ptidxbits = 0; + sc[pass].ptesize = 0; + break; + case RISCV_IOMMU_DC_IOHGATP_MODE_SV39X4: + sv_mode = pass ? RISCV_IOMMU_CAP_SV39X4 : RISCV_IOMMU_CAP_SV39; + if (!(s->cap & sv_mode)) { + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + sc[pass].levels = 3; + sc[pass].ptidxbits = 9; + sc[pass].ptesize = 8; + break; + case RISCV_IOMMU_DC_IOHGATP_MODE_SV48X4: + sv_mode = pass ? RISCV_IOMMU_CAP_SV48X4 : RISCV_IOMMU_CAP_SV48; + if (!(s->cap & sv_mode)) { + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + sc[pass].levels = 4; + sc[pass].ptidxbits = 9; + sc[pass].ptesize = 8; + break; + case RISCV_IOMMU_DC_IOHGATP_MODE_SV57X4: + sv_mode = pass ? RISCV_IOMMU_CAP_SV57X4 : RISCV_IOMMU_CAP_SV57; + if (!(s->cap & sv_mode)) { + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + sc[pass].levels = 5; + sc[pass].ptidxbits = 9; + sc[pass].ptesize = 8; + break; + default: + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + } + }; + + /* S/G stages translation tables root pointers */ + gatp = PPN_PHYS(get_field(ctx->gatp, RISCV_IOMMU_ATP_PPN_FIELD)); + satp = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_ATP_PPN_FIELD)); + addr = (en_s && en_g) ? satp : iotlb->iova; + base = en_g ? gatp : satp; + pass = en_g ? G_STAGE : S_STAGE; + + do { + const unsigned widened = (pass && !sc[pass].step) ? 2 : 0; + const unsigned va_bits = widened + sc[pass].ptidxbits; + const unsigned va_skip = TARGET_PAGE_BITS + sc[pass].ptidxbits * + (sc[pass].levels - 1 - sc[pass].step); + const unsigned idx = (addr >> va_skip) & ((1 << va_bits) - 1); + const dma_addr_t pte_addr = base + idx * sc[pass].ptesize; + const bool ade = + ctx->tc & (pass ? RISCV_IOMMU_DC_TC_GADE : RISCV_IOMMU_DC_TC_SADE); + + /* Address range check before first level lookup */ + if (!sc[pass].step) { + const uint64_t va_mask = (1ULL << (va_skip + va_bits)) - 1; + if ((addr & va_mask) != addr) { + return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; + } + } + + /* Read page table entry */ + if (sc[pass].ptesize == 4) { + uint32_t pte32 = 0; + ret = ldl_le_dma(s->target_as, pte_addr, &pte32, + MEMTXATTRS_UNSPECIFIED); + pte = pte32; + } else { + ret = ldq_le_dma(s->target_as, pte_addr, &pte, + MEMTXATTRS_UNSPECIFIED); + } + if (ret != MEMTX_OK) { + return (iotlb->perm & IOMMU_WO) ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT + : RISCV_IOMMU_FQ_CAUSE_RD_FAULT; + } + + sc[pass].step++; + hwaddr ppn = pte >> PTE_PPN_SHIFT; + + if (!(pte & PTE_V)) { + break; /* Invalid PTE */ + } else if (!(pte & (PTE_R | PTE_W | PTE_X))) { + base = PPN_PHYS(ppn); /* Inner PTE, continue walking */ + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) { + break; /* Reserved leaf PTE flags: PTE_W */ + } else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) { + break; /* Reserved leaf PTE flags: PTE_W + PTE_X */ + } else if (ppn & ((1ULL << (va_skip - TARGET_PAGE_BITS)) - 1)) { + break; /* Misaligned PPN */ + } else if ((iotlb->perm & IOMMU_RO) && !(pte & PTE_R)) { + break; /* Read access check failed */ + } else if ((iotlb->perm & IOMMU_WO) && !(pte & PTE_W)) { + break; /* Write access check failed */ + } else if ((iotlb->perm & IOMMU_RO) && !ade && !(pte & PTE_A)) { + break; /* Access bit not set */ + } else if ((iotlb->perm & IOMMU_WO) && !ade && !(pte & PTE_D)) { + break; /* Dirty bit not set */ + } else { + /* Leaf PTE, translation completed. */ + sc[pass].step = sc[pass].levels; + base = PPN_PHYS(ppn) | (addr & ((1ULL << va_skip) - 1)); + /* Update address mask based on smallest translation granularity */ + iotlb->addr_mask &= (1ULL << va_skip) - 1; + /* Continue with S-Stage translation? */ + if (pass && sc[0].step != sc[0].levels) { + pass = S_STAGE; + addr = iotlb->iova; + continue; + } + /* Translation phase completed (GPA or SPA) */ + iotlb->translated_addr = base; + iotlb->perm = (pte & PTE_W) ? ((pte & PTE_R) ? IOMMU_RW : IOMMU_WO) + : IOMMU_RO; + + /* Check MSI GPA address match */ + if (pass == S_STAGE && (iotlb->perm & IOMMU_WO) && + riscv_iommu_msi_check(s, ctx, base)) { + /* Trap MSI writes and return GPA address. */ + iotlb->target_as = &s->trap_as; + iotlb->addr_mask = ~TARGET_PAGE_MASK; + return 0; + } + + /* Continue with G-Stage translation? */ + if (!pass && en_g) { + pass = G_STAGE; + addr = base; + base = gatp; + sc[pass].step = 0; + continue; + } + + return 0; + } + + if (sc[pass].step == sc[pass].levels) { + break; /* Can't find leaf PTE */ + } + + /* Continue with G-Stage translation? */ + if (!pass && en_g) { + pass = G_STAGE; + addr = base; + base = gatp; + sc[pass].step = 0; + } + } while (1); + + return (iotlb->perm & IOMMU_WO) ? + (pass ? RISCV_IOMMU_FQ_CAUSE_WR_FAULT_VS : + RISCV_IOMMU_FQ_CAUSE_WR_FAULT_S) : + (pass ? RISCV_IOMMU_FQ_CAUSE_RD_FAULT_VS : + RISCV_IOMMU_FQ_CAUSE_RD_FAULT_S); +} + +static void riscv_iommu_report_fault(RISCVIOMMUState *s, + RISCVIOMMUContext *ctx, + uint32_t fault_type, uint32_t cause, + bool pv, + uint64_t iotval, uint64_t iotval2) +{ + struct riscv_iommu_fq_record ev = { 0 }; + + if (ctx->tc & RISCV_IOMMU_DC_TC_DTF) { + switch (cause) { + case RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED: + case RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT: + case RISCV_IOMMU_FQ_CAUSE_DDT_INVALID: + case RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED: + case RISCV_IOMMU_FQ_CAUSE_DDT_CORRUPTED: + case RISCV_IOMMU_FQ_CAUSE_INTERNAL_DP_ERROR: + case RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT: + break; + default: + /* DTF prevents reporting a fault for this given cause */ + return; + } + } + + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_CAUSE, cause); + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_TTYPE, fault_type); + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_DID, ctx->devid); + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PV, true); + + if (pv) { + ev.hdr = set_field(ev.hdr, RISCV_IOMMU_FQ_HDR_PID, ctx->process_id); + } + + ev.iotval = iotval; + ev.iotval2 = iotval2; + + riscv_iommu_fault(s, &ev); +} + +/* Redirect MSI write for given GPA. */ +static MemTxResult riscv_iommu_msi_write(RISCVIOMMUState *s, + RISCVIOMMUContext *ctx, uint64_t gpa, uint64_t data, + unsigned size, MemTxAttrs attrs) +{ + MemTxResult res; + dma_addr_t addr; + uint64_t intn; + uint32_t n190; + uint64_t pte[2]; + int fault_type = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; + int cause; + + /* Interrupt File Number */ + intn = _pext_u64(PPN_DOWN(gpa), ctx->msi_addr_mask); + if (intn >= 256) { + /* Interrupt file number out of range */ + res = MEMTX_ACCESS_ERROR; + cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; + goto err; + } + + /* fetch MSI PTE */ + addr = PPN_PHYS(get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_PPN)); + addr = addr | (intn * sizeof(pte)); + res = dma_memory_read(s->target_as, addr, &pte, sizeof(pte), + MEMTXATTRS_UNSPECIFIED); + if (res != MEMTX_OK) { + if (res == MEMTX_DECODE_ERROR) { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_PT_CORRUPTED; + } else { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; + } + goto err; + } + + le64_to_cpus(&pte[0]); + le64_to_cpus(&pte[1]); + + if (!(pte[0] & RISCV_IOMMU_MSI_PTE_V) || (pte[0] & RISCV_IOMMU_MSI_PTE_C)) { + /* + * The spec mentions that: "If msipte.C == 1, then further + * processing to interpret the PTE is implementation + * defined.". We'll abort with cause = 262 for this + * case too. + */ + res = MEMTX_ACCESS_ERROR; + cause = RISCV_IOMMU_FQ_CAUSE_MSI_INVALID; + goto err; + } + + switch (get_field(pte[0], RISCV_IOMMU_MSI_PTE_M)) { + case RISCV_IOMMU_MSI_PTE_M_BASIC: + /* MSI Pass-through mode */ + addr = PPN_PHYS(get_field(pte[0], RISCV_IOMMU_MSI_PTE_PPN)); + + trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), + PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), + gpa, addr); + + res = dma_memory_write(s->target_as, addr, &data, size, attrs); + if (res != MEMTX_OK) { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; + goto err; + } + + return MEMTX_OK; + case RISCV_IOMMU_MSI_PTE_M_MRIF: + /* MRIF mode, continue. */ + break; + default: + res = MEMTX_ACCESS_ERROR; + cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; + goto err; + } + + /* + * Report an error for interrupt identities exceeding the maximum allowed + * for an IMSIC interrupt file (2047) or destination address is not 32-bit + * aligned. See IOMMU Specification, Chapter 2.3. MSI page tables. + */ + if ((data > 2047) || (gpa & 3)) { + res = MEMTX_ACCESS_ERROR; + cause = RISCV_IOMMU_FQ_CAUSE_MSI_MISCONFIGURED; + goto err; + } + + /* MSI MRIF mode, non atomic pending bit update */ + + /* MRIF pending bit address */ + addr = get_field(pte[0], RISCV_IOMMU_MSI_PTE_MRIF_ADDR) << 9; + addr = addr | ((data & 0x7c0) >> 3); + + trace_riscv_iommu_msi(s->parent_obj.id, PCI_BUS_NUM(ctx->devid), + PCI_SLOT(ctx->devid), PCI_FUNC(ctx->devid), + gpa, addr); + + /* MRIF pending bit mask */ + data = 1ULL << (data & 0x03f); + res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); + if (res != MEMTX_OK) { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; + goto err; + } + + intn = intn | data; + res = dma_memory_write(s->target_as, addr, &intn, sizeof(intn), attrs); + if (res != MEMTX_OK) { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; + goto err; + } + + /* Get MRIF enable bits */ + addr = addr + sizeof(intn); + res = dma_memory_read(s->target_as, addr, &intn, sizeof(intn), attrs); + if (res != MEMTX_OK) { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_LOAD_FAULT; + goto err; + } + + if (!(intn & data)) { + /* notification disabled, MRIF update completed. */ + return MEMTX_OK; + } + + /* Send notification message */ + addr = PPN_PHYS(get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NPPN)); + n190 = get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID) | + (get_field(pte[1], RISCV_IOMMU_MSI_MRIF_NID_MSB) << 10); + + res = dma_memory_write(s->target_as, addr, &n190, sizeof(n190), attrs); + if (res != MEMTX_OK) { + cause = RISCV_IOMMU_FQ_CAUSE_MSI_WR_FAULT; + goto err; + } + + trace_riscv_iommu_mrif_notification(s->parent_obj.id, n190, addr); + + return MEMTX_OK; + +err: + riscv_iommu_report_fault(s, ctx, fault_type, cause, + !!ctx->process_id, 0, 0); + return res; +} + +/* + * Check device context configuration as described by the + * riscv-iommu spec section "Device-context configuration + * checks". + */ +static bool riscv_iommu_validate_device_ctx(RISCVIOMMUState *s, + RISCVIOMMUContext *ctx) +{ + uint32_t fsc_mode, msi_mode; + uint64_t gatp; + + if (!(s->cap & RISCV_IOMMU_CAP_ATS) && + (ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS || + ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI || + ctx->tc & RISCV_IOMMU_DC_TC_PRPR)) { + return false; + } + + if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS) && + (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA || + ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI)) { + return false; + } + + if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_PRI) && + ctx->tc & RISCV_IOMMU_DC_TC_PRPR) { + return false; + } + + if (!(s->cap & RISCV_IOMMU_CAP_T2GPA) && + ctx->tc & RISCV_IOMMU_DC_TC_T2GPA) { + return false; + } + + if (s->cap & RISCV_IOMMU_CAP_MSI_FLAT) { + msi_mode = get_field(ctx->msiptp, RISCV_IOMMU_DC_MSIPTP_MODE); + + if (msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_OFF && + msi_mode != RISCV_IOMMU_DC_MSIPTP_MODE_FLAT) { + return false; + } + } + + gatp = get_field(ctx->gatp, RISCV_IOMMU_ATP_MODE_FIELD); + if (ctx->tc & RISCV_IOMMU_DC_TC_T2GPA && + gatp == RISCV_IOMMU_DC_IOHGATP_MODE_BARE) { + return false; + } + + fsc_mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); + + if (ctx->tc & RISCV_IOMMU_DC_TC_PDTV) { + switch (fsc_mode) { + case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8: + if (!(s->cap & RISCV_IOMMU_CAP_PD8)) { + return false; + } + break; + case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD17: + if (!(s->cap & RISCV_IOMMU_CAP_PD17)) { + return false; + } + break; + case RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20: + if (!(s->cap & RISCV_IOMMU_CAP_PD20)) { + return false; + } + break; + } + } else { + /* DC.tc.PDTV is 0 */ + if (ctx->tc & RISCV_IOMMU_DC_TC_DPE) { + return false; + } + + if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { + if (fsc_mode == RISCV_IOMMU_CAP_SV32 && + !(s->cap & RISCV_IOMMU_CAP_SV32)) { + return false; + } + } else { + switch (fsc_mode) { + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: + if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { + return false; + } + break; + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: + if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { + return false; + } + break; + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: + if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { + return false; + } + break; + } + } + } + + /* + * CAP_END is always zero (only one endianess). FCTL_BE is + * always zero (little-endian accesses). Thus TC_SBE must + * always be LE, i.e. zero. + */ + if (ctx->tc & RISCV_IOMMU_DC_TC_SBE) { + return false; + } + + return true; +} + +/* + * Validate process context (PC) according to section + * "Process-context configuration checks". + */ +static bool riscv_iommu_validate_process_ctx(RISCVIOMMUState *s, + RISCVIOMMUContext *ctx) +{ + uint32_t mode; + + if (get_field(ctx->ta, RISCV_IOMMU_PC_TA_RESERVED)) { + return false; + } + + if (get_field(ctx->satp, RISCV_IOMMU_PC_FSC_RESERVED)) { + return false; + } + + mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); + switch (mode) { + case RISCV_IOMMU_DC_FSC_MODE_BARE: + /* sv39 and sv32 modes have the same value (8) */ + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: + break; + default: + return false; + } + + if (ctx->tc & RISCV_IOMMU_DC_TC_SXL) { + if (mode == RISCV_IOMMU_CAP_SV32 && + !(s->cap & RISCV_IOMMU_CAP_SV32)) { + return false; + } + } else { + switch (mode) { + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39: + if (!(s->cap & RISCV_IOMMU_CAP_SV39)) { + return false; + } + break; + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48: + if (!(s->cap & RISCV_IOMMU_CAP_SV48)) { + return false; + } + break; + case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57: + if (!(s->cap & RISCV_IOMMU_CAP_SV57)) { + return false; + } + break; + } + } + + return true; +} + +/* + * RISC-V IOMMU Device Context Loopkup - Device Directory Tree Walk + * + * @s : IOMMU Device State + * @ctx : Device Translation Context with devid and process_id set. + * @return : success or fault code. + */ +static int riscv_iommu_ctx_fetch(RISCVIOMMUState *s, RISCVIOMMUContext *ctx) +{ + const uint64_t ddtp = s->ddtp; + unsigned mode = get_field(ddtp, RISCV_IOMMU_DDTP_MODE); + dma_addr_t addr = PPN_PHYS(get_field(ddtp, RISCV_IOMMU_DDTP_PPN)); + struct riscv_iommu_dc dc; + /* Device Context format: 0: extended (64 bytes) | 1: base (32 bytes) */ + const int dc_fmt = !s->enable_msi; + const size_t dc_len = sizeof(dc) >> dc_fmt; + unsigned depth; + uint64_t de; + + switch (mode) { + case RISCV_IOMMU_DDTP_MODE_OFF: + return RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED; + + case RISCV_IOMMU_DDTP_MODE_BARE: + /* mock up pass-through translation context */ + ctx->gatp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, + RISCV_IOMMU_DC_IOHGATP_MODE_BARE); + ctx->satp = set_field(0, RISCV_IOMMU_ATP_MODE_FIELD, + RISCV_IOMMU_DC_FSC_MODE_BARE); + + ctx->tc = RISCV_IOMMU_DC_TC_V; + if (s->enable_ats) { + ctx->tc |= RISCV_IOMMU_DC_TC_EN_ATS; + } + + ctx->ta = 0; + ctx->msiptp = 0; + return 0; + + case RISCV_IOMMU_DDTP_MODE_1LVL: + depth = 0; + break; + + case RISCV_IOMMU_DDTP_MODE_2LVL: + depth = 1; + break; + + case RISCV_IOMMU_DDTP_MODE_3LVL: + depth = 2; + break; + + default: + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + + /* + * Check supported device id width (in bits). + * See IOMMU Specification, Chapter 6. Software guidelines. + * - if extended device-context format is used: + * 1LVL: 6, 2LVL: 15, 3LVL: 24 + * - if base device-context format is used: + * 1LVL: 7, 2LVL: 16, 3LVL: 24 + */ + if (ctx->devid >= (1 << (depth * 9 + 6 + (dc_fmt && depth != 2)))) { + return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; + } + + /* Device directory tree walk */ + for (; depth-- > 0; ) { + /* + * Select device id index bits based on device directory tree level + * and device context format. + * See IOMMU Specification, Chapter 2. Data Structures. + * - if extended device-context format is used: + * device index: [23:15][14:6][5:0] + * - if base device-context format is used: + * device index: [23:16][15:7][6:0] + */ + const int split = depth * 9 + 6 + dc_fmt; + addr |= ((ctx->devid >> split) << 3) & ~TARGET_PAGE_MASK; + if (dma_memory_read(s->target_as, addr, &de, sizeof(de), + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { + return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; + } + le64_to_cpus(&de); + if (!(de & RISCV_IOMMU_DDTE_VALID)) { + /* invalid directory entry */ + return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; + } + if (de & ~(RISCV_IOMMU_DDTE_PPN | RISCV_IOMMU_DDTE_VALID)) { + /* reserved bits set */ + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + addr = PPN_PHYS(get_field(de, RISCV_IOMMU_DDTE_PPN)); + } + + /* index into device context entry page */ + addr |= (ctx->devid * dc_len) & ~TARGET_PAGE_MASK; + + memset(&dc, 0, sizeof(dc)); + if (dma_memory_read(s->target_as, addr, &dc, dc_len, + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { + return RISCV_IOMMU_FQ_CAUSE_DDT_LOAD_FAULT; + } + + /* Set translation context. */ + ctx->tc = le64_to_cpu(dc.tc); + ctx->gatp = le64_to_cpu(dc.iohgatp); + ctx->satp = le64_to_cpu(dc.fsc); + ctx->ta = le64_to_cpu(dc.ta); + ctx->msiptp = le64_to_cpu(dc.msiptp); + ctx->msi_addr_mask = le64_to_cpu(dc.msi_addr_mask); + ctx->msi_addr_pattern = le64_to_cpu(dc.msi_addr_pattern); + + if (!(ctx->tc & RISCV_IOMMU_DC_TC_V)) { + return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; + } + + if (!riscv_iommu_validate_device_ctx(s, ctx)) { + return RISCV_IOMMU_FQ_CAUSE_DDT_MISCONFIGURED; + } + + /* FSC field checks */ + mode = get_field(ctx->satp, RISCV_IOMMU_DC_FSC_MODE); + addr = PPN_PHYS(get_field(ctx->satp, RISCV_IOMMU_DC_FSC_PPN)); + + if (!(ctx->tc & RISCV_IOMMU_DC_TC_PDTV)) { + if (ctx->process_id != RISCV_IOMMU_NOPROCID) { + /* PID is disabled */ + return RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; + } + if (mode > RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57) { + /* Invalid translation mode */ + return RISCV_IOMMU_FQ_CAUSE_DDT_INVALID; + } + return 0; + } + + if (ctx->process_id == RISCV_IOMMU_NOPROCID) { + if (!(ctx->tc & RISCV_IOMMU_DC_TC_DPE)) { + /* No default process_id enabled, set BARE mode */ + ctx->satp = 0ULL; + return 0; + } else { + /* Use default process_id #0 */ + ctx->process_id = 0; + } + } + + if (mode == RISCV_IOMMU_DC_FSC_MODE_BARE) { + /* No S-Stage translation, done. */ + return 0; + } + + /* FSC.TC.PDTV enabled */ + if (mode > RISCV_IOMMU_DC_FSC_PDTP_MODE_PD20) { + /* Invalid PDTP.MODE */ + return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; + } + + for (depth = mode - RISCV_IOMMU_DC_FSC_PDTP_MODE_PD8; depth-- > 0; ) { + /* + * Select process id index bits based on process directory tree + * level. See IOMMU Specification, 2.2. Process-Directory-Table. + */ + const int split = depth * 9 + 8; + addr |= ((ctx->process_id >> split) << 3) & ~TARGET_PAGE_MASK; + if (dma_memory_read(s->target_as, addr, &de, sizeof(de), + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { + return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; + } + le64_to_cpus(&de); + if (!(de & RISCV_IOMMU_PC_TA_V)) { + return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; + } + addr = PPN_PHYS(get_field(de, RISCV_IOMMU_PC_FSC_PPN)); + } + + /* Leaf entry in PDT */ + addr |= (ctx->process_id << 4) & ~TARGET_PAGE_MASK; + if (dma_memory_read(s->target_as, addr, &dc.ta, sizeof(uint64_t) * 2, + MEMTXATTRS_UNSPECIFIED) != MEMTX_OK) { + return RISCV_IOMMU_FQ_CAUSE_PDT_LOAD_FAULT; + } + + /* Use FSC and TA from process directory entry. */ + ctx->ta = le64_to_cpu(dc.ta); + ctx->satp = le64_to_cpu(dc.fsc); + + if (!(ctx->ta & RISCV_IOMMU_PC_TA_V)) { + return RISCV_IOMMU_FQ_CAUSE_PDT_INVALID; + } + + if (!riscv_iommu_validate_process_ctx(s, ctx)) { + return RISCV_IOMMU_FQ_CAUSE_PDT_MISCONFIGURED; + } + + return 0; +} + +/* Translation Context cache support */ +static gboolean riscv_iommu_ctx_equal(gconstpointer v1, gconstpointer v2) +{ + RISCVIOMMUContext *c1 = (RISCVIOMMUContext *) v1; + RISCVIOMMUContext *c2 = (RISCVIOMMUContext *) v2; + return c1->devid == c2->devid && + c1->process_id == c2->process_id; +} + +static guint riscv_iommu_ctx_hash(gconstpointer v) +{ + RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) v; + /* + * Generate simple hash of (process_id, devid) + * assuming 24-bit wide devid. + */ + return (guint)(ctx->devid) + ((guint)(ctx->process_id) << 24); +} + +static void riscv_iommu_ctx_inval_devid_procid(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; + RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; + if (ctx->tc & RISCV_IOMMU_DC_TC_V && + ctx->devid == arg->devid && + ctx->process_id == arg->process_id) { + ctx->tc &= ~RISCV_IOMMU_DC_TC_V; + } +} + +static void riscv_iommu_ctx_inval_devid(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; + RISCVIOMMUContext *arg = (RISCVIOMMUContext *) data; + if (ctx->tc & RISCV_IOMMU_DC_TC_V && + ctx->devid == arg->devid) { + ctx->tc &= ~RISCV_IOMMU_DC_TC_V; + } +} + +static void riscv_iommu_ctx_inval_all(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUContext *ctx = (RISCVIOMMUContext *) value; + if (ctx->tc & RISCV_IOMMU_DC_TC_V) { + ctx->tc &= ~RISCV_IOMMU_DC_TC_V; + } +} + +static void riscv_iommu_ctx_inval(RISCVIOMMUState *s, GHFunc func, + uint32_t devid, uint32_t process_id) +{ + GHashTable *ctx_cache; + RISCVIOMMUContext key = { + .devid = devid, + .process_id = process_id, + }; + ctx_cache = g_hash_table_ref(s->ctx_cache); + g_hash_table_foreach(ctx_cache, func, &key); + g_hash_table_unref(ctx_cache); +} + +/* Find or allocate translation context for a given {device_id, process_id} */ +static RISCVIOMMUContext *riscv_iommu_ctx(RISCVIOMMUState *s, + unsigned devid, unsigned process_id, + void **ref) +{ + GHashTable *ctx_cache; + RISCVIOMMUContext *ctx; + RISCVIOMMUContext key = { + .devid = devid, + .process_id = process_id, + }; + + ctx_cache = g_hash_table_ref(s->ctx_cache); + ctx = g_hash_table_lookup(ctx_cache, &key); + + if (ctx && (ctx->tc & RISCV_IOMMU_DC_TC_V)) { + *ref = ctx_cache; + return ctx; + } + + ctx = g_new0(RISCVIOMMUContext, 1); + ctx->devid = devid; + ctx->process_id = process_id; + + int fault = riscv_iommu_ctx_fetch(s, ctx); + if (!fault) { + if (g_hash_table_size(ctx_cache) >= LIMIT_CACHE_CTX) { + g_hash_table_unref(ctx_cache); + ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, + riscv_iommu_ctx_equal, + g_free, NULL); + g_hash_table_ref(ctx_cache); + g_hash_table_unref(qatomic_xchg(&s->ctx_cache, ctx_cache)); + } + g_hash_table_add(ctx_cache, ctx); + *ref = ctx_cache; + return ctx; + } + + g_hash_table_unref(ctx_cache); + *ref = NULL; + + riscv_iommu_report_fault(s, ctx, RISCV_IOMMU_FQ_TTYPE_UADDR_RD, + fault, !!process_id, 0, 0); + + g_free(ctx); + return NULL; +} + +static void riscv_iommu_ctx_put(RISCVIOMMUState *s, void *ref) +{ + if (ref) { + g_hash_table_unref((GHashTable *)ref); + } +} + +/* Find or allocate address space for a given device */ +static AddressSpace *riscv_iommu_space(RISCVIOMMUState *s, uint32_t devid) +{ + RISCVIOMMUSpace *as; + + /* FIXME: PCIe bus remapping for attached endpoints. */ + devid |= s->bus << 8; + + QLIST_FOREACH(as, &s->spaces, list) { + if (as->devid == devid) { + break; + } + } + + if (as == NULL) { + char name[64]; + as = g_new0(RISCVIOMMUSpace, 1); + + as->iommu = s; + as->devid = devid; + + snprintf(name, sizeof(name), "riscv-iommu-%04x:%02x.%d-iova", + PCI_BUS_NUM(as->devid), PCI_SLOT(as->devid), PCI_FUNC(as->devid)); + + /* IOVA address space, untranslated addresses */ + memory_region_init_iommu(&as->iova_mr, sizeof(as->iova_mr), + TYPE_RISCV_IOMMU_MEMORY_REGION, + OBJECT(as), "riscv_iommu", UINT64_MAX); + address_space_init(&as->iova_as, MEMORY_REGION(&as->iova_mr), name); + + QLIST_INSERT_HEAD(&s->spaces, as, list); + + trace_riscv_iommu_new(s->parent_obj.id, PCI_BUS_NUM(as->devid), + PCI_SLOT(as->devid), PCI_FUNC(as->devid)); + } + return &as->iova_as; +} + +/* Translation Object cache support */ +static gboolean riscv_iommu_iot_equal(gconstpointer v1, gconstpointer v2) +{ + RISCVIOMMUEntry *t1 = (RISCVIOMMUEntry *) v1; + RISCVIOMMUEntry *t2 = (RISCVIOMMUEntry *) v2; + return t1->gscid == t2->gscid && t1->pscid == t2->pscid && + t1->iova == t2->iova; +} + +static guint riscv_iommu_iot_hash(gconstpointer v) +{ + RISCVIOMMUEntry *t = (RISCVIOMMUEntry *) v; + return (guint)t->iova; +} + +/* GV: 1 PSCV: 1 AV: 1 */ +static void riscv_iommu_iot_inval_pscid_iova(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; + RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; + if (iot->gscid == arg->gscid && + iot->pscid == arg->pscid && + iot->iova == arg->iova) { + iot->perm = IOMMU_NONE; + } +} + +/* GV: 1 PSCV: 1 AV: 0 */ +static void riscv_iommu_iot_inval_pscid(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; + RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; + if (iot->gscid == arg->gscid && + iot->pscid == arg->pscid) { + iot->perm = IOMMU_NONE; + } +} + +/* GV: 1 GVMA: 1 */ +static void riscv_iommu_iot_inval_gscid_gpa(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; + RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; + if (iot->gscid == arg->gscid) { + /* simplified cache, no GPA matching */ + iot->perm = IOMMU_NONE; + } +} + +/* GV: 1 GVMA: 0 */ +static void riscv_iommu_iot_inval_gscid(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; + RISCVIOMMUEntry *arg = (RISCVIOMMUEntry *) data; + if (iot->gscid == arg->gscid) { + iot->perm = IOMMU_NONE; + } +} + +/* GV: 0 */ +static void riscv_iommu_iot_inval_all(gpointer key, gpointer value, + gpointer data) +{ + RISCVIOMMUEntry *iot = (RISCVIOMMUEntry *) value; + iot->perm = IOMMU_NONE; +} + +/* caller should keep ref-count for iot_cache object */ +static RISCVIOMMUEntry *riscv_iommu_iot_lookup(RISCVIOMMUContext *ctx, + GHashTable *iot_cache, hwaddr iova) +{ + RISCVIOMMUEntry key = { + .gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID), + .pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID), + .iova = PPN_DOWN(iova), + }; + return g_hash_table_lookup(iot_cache, &key); +} + +/* caller should keep ref-count for iot_cache object */ +static void riscv_iommu_iot_update(RISCVIOMMUState *s, + GHashTable *iot_cache, RISCVIOMMUEntry *iot) +{ + if (!s->iot_limit) { + return; + } + + if (g_hash_table_size(s->iot_cache) >= s->iot_limit) { + iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, + riscv_iommu_iot_equal, + g_free, NULL); + g_hash_table_unref(qatomic_xchg(&s->iot_cache, iot_cache)); + } + g_hash_table_add(iot_cache, iot); +} + +static void riscv_iommu_iot_inval(RISCVIOMMUState *s, GHFunc func, + uint32_t gscid, uint32_t pscid, hwaddr iova) +{ + GHashTable *iot_cache; + RISCVIOMMUEntry key = { + .gscid = gscid, + .pscid = pscid, + .iova = PPN_DOWN(iova), + }; + + iot_cache = g_hash_table_ref(s->iot_cache); + g_hash_table_foreach(iot_cache, func, &key); + g_hash_table_unref(iot_cache); +} + +static int riscv_iommu_translate(RISCVIOMMUState *s, RISCVIOMMUContext *ctx, + IOMMUTLBEntry *iotlb, bool enable_cache) +{ + RISCVIOMMUEntry *iot; + IOMMUAccessFlags perm; + bool enable_pid; + bool enable_pri; + GHashTable *iot_cache; + int fault; + + iot_cache = g_hash_table_ref(s->iot_cache); + /* + * TC[32] is reserved for custom extensions, used here to temporarily + * enable automatic page-request generation for ATS queries. + */ + enable_pri = (iotlb->perm == IOMMU_NONE) && (ctx->tc & BIT_ULL(32)); + enable_pid = (ctx->tc & RISCV_IOMMU_DC_TC_PDTV); + + /* Check for ATS request. */ + if (iotlb->perm == IOMMU_NONE) { + /* Check if ATS is disabled. */ + if (!(ctx->tc & RISCV_IOMMU_DC_TC_EN_ATS)) { + enable_pri = false; + fault = RISCV_IOMMU_FQ_CAUSE_TTYPE_BLOCKED; + goto done; + } + } + + iot = riscv_iommu_iot_lookup(ctx, iot_cache, iotlb->iova); + perm = iot ? iot->perm : IOMMU_NONE; + if (perm != IOMMU_NONE) { + iotlb->translated_addr = PPN_PHYS(iot->phys); + iotlb->addr_mask = ~TARGET_PAGE_MASK; + iotlb->perm = perm; + fault = 0; + goto done; + } + + /* Translate using device directory / page table information. */ + fault = riscv_iommu_spa_fetch(s, ctx, iotlb); + + if (!fault && iotlb->target_as == &s->trap_as) { + /* Do not cache trapped MSI translations */ + goto done; + } + + /* + * We made an implementation choice to not cache identity-mapped + * translations, as allowed by the specification, to avoid + * translation cache evictions for other devices sharing the + * IOMMU hardware model. + */ + if (!fault && iotlb->translated_addr != iotlb->iova && enable_cache) { + iot = g_new0(RISCVIOMMUEntry, 1); + iot->iova = PPN_DOWN(iotlb->iova); + iot->phys = PPN_DOWN(iotlb->translated_addr); + iot->gscid = get_field(ctx->gatp, RISCV_IOMMU_DC_IOHGATP_GSCID); + iot->pscid = get_field(ctx->ta, RISCV_IOMMU_DC_TA_PSCID); + iot->perm = iotlb->perm; + riscv_iommu_iot_update(s, iot_cache, iot); + } + +done: + g_hash_table_unref(iot_cache); + + if (enable_pri && fault) { + struct riscv_iommu_pq_record pr = {0}; + if (enable_pid) { + pr.hdr = set_field(RISCV_IOMMU_PREQ_HDR_PV, + RISCV_IOMMU_PREQ_HDR_PID, ctx->process_id); + } + pr.hdr = set_field(pr.hdr, RISCV_IOMMU_PREQ_HDR_DID, ctx->devid); + pr.payload = (iotlb->iova & TARGET_PAGE_MASK) | + RISCV_IOMMU_PREQ_PAYLOAD_M; + riscv_iommu_pri(s, &pr); + return fault; + } + + if (fault) { + unsigned ttype = RISCV_IOMMU_FQ_TTYPE_PCIE_ATS_REQ; + + if (iotlb->perm & IOMMU_RW) { + ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_WR; + } else if (iotlb->perm & IOMMU_RO) { + ttype = RISCV_IOMMU_FQ_TTYPE_UADDR_RD; + } + + riscv_iommu_report_fault(s, ctx, ttype, fault, enable_pid, + iotlb->iova, iotlb->translated_addr); + return fault; + } + + return 0; +} + +/* IOMMU Command Interface */ +static MemTxResult riscv_iommu_iofence(RISCVIOMMUState *s, bool notify, + uint64_t addr, uint32_t data) +{ + /* + * ATS processing in this implementation of the IOMMU is synchronous, + * no need to wait for completions here. + */ + if (!notify) { + return MEMTX_OK; + } + + return dma_memory_write(s->target_as, addr, &data, sizeof(data), + MEMTXATTRS_UNSPECIFIED); +} + +static void riscv_iommu_ats(RISCVIOMMUState *s, + struct riscv_iommu_command *cmd, IOMMUNotifierFlag flag, + IOMMUAccessFlags perm, + void (*trace_fn)(const char *id)) +{ + RISCVIOMMUSpace *as = NULL; + IOMMUNotifier *n; + IOMMUTLBEvent event; + uint32_t pid; + uint32_t devid; + const bool pv = cmd->dword0 & RISCV_IOMMU_CMD_ATS_PV; + + if (cmd->dword0 & RISCV_IOMMU_CMD_ATS_DSV) { + /* Use device segment and requester id */ + devid = get_field(cmd->dword0, + RISCV_IOMMU_CMD_ATS_DSEG | RISCV_IOMMU_CMD_ATS_RID); + } else { + devid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_RID); + } + + pid = get_field(cmd->dword0, RISCV_IOMMU_CMD_ATS_PID); + + QLIST_FOREACH(as, &s->spaces, list) { + if (as->devid == devid) { + break; + } + } + + if (!as || !as->notifier) { + return; + } + + event.type = flag; + event.entry.perm = perm; + event.entry.target_as = s->target_as; + + IOMMU_NOTIFIER_FOREACH(n, &as->iova_mr) { + if (!pv || n->iommu_idx == pid) { + event.entry.iova = n->start; + event.entry.addr_mask = n->end - n->start; + trace_fn(as->iova_mr.parent_obj.name); + memory_region_notify_iommu_one(n, &event); + } + } +} + +static void riscv_iommu_ats_inval(RISCVIOMMUState *s, + struct riscv_iommu_command *cmd) +{ + return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_DEVIOTLB_UNMAP, IOMMU_NONE, + trace_riscv_iommu_ats_inval); +} + +static void riscv_iommu_ats_prgr(RISCVIOMMUState *s, + struct riscv_iommu_command *cmd) +{ + unsigned resp_code = get_field(cmd->dword1, + RISCV_IOMMU_CMD_ATS_PRGR_RESP_CODE); + + /* Using the access flag to carry response code information */ + IOMMUAccessFlags perm = resp_code ? IOMMU_NONE : IOMMU_RW; + return riscv_iommu_ats(s, cmd, IOMMU_NOTIFIER_MAP, perm, + trace_riscv_iommu_ats_prgr); +} + +static void riscv_iommu_process_ddtp(RISCVIOMMUState *s) +{ + uint64_t old_ddtp = s->ddtp; + uint64_t new_ddtp = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_DDTP); + unsigned new_mode = get_field(new_ddtp, RISCV_IOMMU_DDTP_MODE); + unsigned old_mode = get_field(old_ddtp, RISCV_IOMMU_DDTP_MODE); + bool ok = false; + + /* + * Check for allowed DDTP.MODE transitions: + * {OFF, BARE} -> {OFF, BARE, 1LVL, 2LVL, 3LVL} + * {1LVL, 2LVL, 3LVL} -> {OFF, BARE} + */ + if (new_mode == old_mode || + new_mode == RISCV_IOMMU_DDTP_MODE_OFF || + new_mode == RISCV_IOMMU_DDTP_MODE_BARE) { + ok = true; + } else if (new_mode == RISCV_IOMMU_DDTP_MODE_1LVL || + new_mode == RISCV_IOMMU_DDTP_MODE_2LVL || + new_mode == RISCV_IOMMU_DDTP_MODE_3LVL) { + ok = old_mode == RISCV_IOMMU_DDTP_MODE_OFF || + old_mode == RISCV_IOMMU_DDTP_MODE_BARE; + } + + if (ok) { + /* clear reserved and busy bits, report back sanitized version */ + new_ddtp = set_field(new_ddtp & RISCV_IOMMU_DDTP_PPN, + RISCV_IOMMU_DDTP_MODE, new_mode); + } else { + new_ddtp = old_ddtp; + } + s->ddtp = new_ddtp; + + riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_DDTP, new_ddtp); +} + +/* Command function and opcode field. */ +#define RISCV_IOMMU_CMD(func, op) (((func) << 7) | (op)) + +static void riscv_iommu_process_cq_tail(RISCVIOMMUState *s) +{ + struct riscv_iommu_command cmd; + MemTxResult res; + dma_addr_t addr; + uint32_t tail, head, ctrl; + uint64_t cmd_opcode; + GHFunc func; + + ctrl = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); + tail = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQT) & s->cq_mask; + head = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQH) & s->cq_mask; + + /* Check for pending error or queue processing disabled */ + if (!(ctrl & RISCV_IOMMU_CQCSR_CQON) || + !!(ctrl & (RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CQMF))) { + return; + } + + while (tail != head) { + addr = s->cq_addr + head * sizeof(cmd); + res = dma_memory_read(s->target_as, addr, &cmd, sizeof(cmd), + MEMTXATTRS_UNSPECIFIED); + + if (res != MEMTX_OK) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, + RISCV_IOMMU_CQCSR_CQMF, 0); + goto fault; + } + + trace_riscv_iommu_cmd(s->parent_obj.id, cmd.dword0, cmd.dword1); + + cmd_opcode = get_field(cmd.dword0, + RISCV_IOMMU_CMD_OPCODE | RISCV_IOMMU_CMD_FUNC); + + switch (cmd_opcode) { + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOFENCE_FUNC_C, + RISCV_IOMMU_CMD_IOFENCE_OPCODE): + res = riscv_iommu_iofence(s, + cmd.dword0 & RISCV_IOMMU_CMD_IOFENCE_AV, cmd.dword1 << 2, + get_field(cmd.dword0, RISCV_IOMMU_CMD_IOFENCE_DATA)); + + if (res != MEMTX_OK) { + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, + RISCV_IOMMU_CQCSR_CQMF, 0); + goto fault; + } + break; + + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_GVMA, + RISCV_IOMMU_CMD_IOTINVAL_OPCODE): + if (cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV) { + /* illegal command arguments IOTINVAL.GVMA & PSCV == 1 */ + goto cmd_ill; + } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) { + /* invalidate all cache mappings */ + func = riscv_iommu_iot_inval_all; + } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) { + /* invalidate cache matching GSCID */ + func = riscv_iommu_iot_inval_gscid; + } else { + /* invalidate cache matching GSCID and ADDR (GPA) */ + func = riscv_iommu_iot_inval_gscid_gpa; + } + riscv_iommu_iot_inval(s, func, + get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), 0, + cmd.dword1 << 2 & TARGET_PAGE_MASK); + break; + + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IOTINVAL_FUNC_VMA, + RISCV_IOMMU_CMD_IOTINVAL_OPCODE): + if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_GV)) { + /* invalidate all cache mappings, simplified model */ + func = riscv_iommu_iot_inval_all; + } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_PSCV)) { + /* invalidate cache matching GSCID, simplified model */ + func = riscv_iommu_iot_inval_gscid; + } else if (!(cmd.dword0 & RISCV_IOMMU_CMD_IOTINVAL_AV)) { + /* invalidate cache matching GSCID and PSCID */ + func = riscv_iommu_iot_inval_pscid; + } else { + /* invalidate cache matching GSCID and PSCID and ADDR (IOVA) */ + func = riscv_iommu_iot_inval_pscid_iova; + } + riscv_iommu_iot_inval(s, func, + get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_GSCID), + get_field(cmd.dword0, RISCV_IOMMU_CMD_IOTINVAL_PSCID), + cmd.dword1 << 2 & TARGET_PAGE_MASK); + break; + + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_DDT, + RISCV_IOMMU_CMD_IODIR_OPCODE): + if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { + /* invalidate all device context cache mappings */ + func = riscv_iommu_ctx_inval_all; + } else { + /* invalidate all device context matching DID */ + func = riscv_iommu_ctx_inval_devid; + } + riscv_iommu_ctx_inval(s, func, + get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), 0); + break; + + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_IODIR_FUNC_INVAL_PDT, + RISCV_IOMMU_CMD_IODIR_OPCODE): + if (!(cmd.dword0 & RISCV_IOMMU_CMD_IODIR_DV)) { + /* illegal command arguments IODIR_PDT & DV == 0 */ + goto cmd_ill; + } else { + func = riscv_iommu_ctx_inval_devid_procid; + } + riscv_iommu_ctx_inval(s, func, + get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_DID), + get_field(cmd.dword0, RISCV_IOMMU_CMD_IODIR_PID)); + break; + + /* ATS commands */ + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_INVAL, + RISCV_IOMMU_CMD_ATS_OPCODE): + if (!s->enable_ats) { + goto cmd_ill; + } + + riscv_iommu_ats_inval(s, &cmd); + break; + + case RISCV_IOMMU_CMD(RISCV_IOMMU_CMD_ATS_FUNC_PRGR, + RISCV_IOMMU_CMD_ATS_OPCODE): + if (!s->enable_ats) { + goto cmd_ill; + } + + riscv_iommu_ats_prgr(s, &cmd); + break; + + default: + cmd_ill: + /* Invalid instruction, do not advance instruction index. */ + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, + RISCV_IOMMU_CQCSR_CMD_ILL, 0); + goto fault; + } + + /* Advance and update head pointer after command completes. */ + head = (head + 1) & s->cq_mask; + riscv_iommu_reg_set32(s, RISCV_IOMMU_REG_CQH, head); + } + return; + +fault: + if (ctrl & RISCV_IOMMU_CQCSR_CIE) { + riscv_iommu_notify(s, RISCV_IOMMU_INTR_CQ); + } +} + +static void riscv_iommu_process_cq_control(RISCVIOMMUState *s) +{ + uint64_t base; + uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); + uint32_t ctrl_clr; + bool enable = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQEN); + bool active = !!(ctrl_set & RISCV_IOMMU_CQCSR_CQON); + + if (enable && !active) { + base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_CQB); + s->cq_mask = (2ULL << get_field(base, RISCV_IOMMU_CQB_LOG2SZ)) - 1; + s->cq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_CQB_PPN)); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~s->cq_mask); + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQH], 0); + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_CQT], 0); + ctrl_set = RISCV_IOMMU_CQCSR_CQON; + ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQMF | + RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_CMD_TO | + RISCV_IOMMU_CQCSR_FENCE_W_IP; + } else if (!enable && active) { + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQT], ~0); + ctrl_set = 0; + ctrl_clr = RISCV_IOMMU_CQCSR_BUSY | RISCV_IOMMU_CQCSR_CQON; + } else { + ctrl_set = 0; + ctrl_clr = RISCV_IOMMU_CQCSR_BUSY; + } + + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_CQCSR, ctrl_set, ctrl_clr); +} + +static void riscv_iommu_process_fq_control(RISCVIOMMUState *s) +{ + uint64_t base; + uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); + uint32_t ctrl_clr; + bool enable = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQEN); + bool active = !!(ctrl_set & RISCV_IOMMU_FQCSR_FQON); + + if (enable && !active) { + base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_FQB); + s->fq_mask = (2ULL << get_field(base, RISCV_IOMMU_FQB_LOG2SZ)) - 1; + s->fq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_FQB_PPN)); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~s->fq_mask); + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQH], 0); + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_FQT], 0); + ctrl_set = RISCV_IOMMU_FQCSR_FQON; + ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQMF | + RISCV_IOMMU_FQCSR_FQOF; + } else if (!enable && active) { + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQH], ~0); + ctrl_set = 0; + ctrl_clr = RISCV_IOMMU_FQCSR_BUSY | RISCV_IOMMU_FQCSR_FQON; + } else { + ctrl_set = 0; + ctrl_clr = RISCV_IOMMU_FQCSR_BUSY; + } + + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_FQCSR, ctrl_set, ctrl_clr); +} + +static void riscv_iommu_process_pq_control(RISCVIOMMUState *s) +{ + uint64_t base; + uint32_t ctrl_set = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); + uint32_t ctrl_clr; + bool enable = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQEN); + bool active = !!(ctrl_set & RISCV_IOMMU_PQCSR_PQON); + + if (enable && !active) { + base = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_PQB); + s->pq_mask = (2ULL << get_field(base, RISCV_IOMMU_PQB_LOG2SZ)) - 1; + s->pq_addr = PPN_PHYS(get_field(base, RISCV_IOMMU_PQB_PPN)); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~s->pq_mask); + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQH], 0); + stl_le_p(&s->regs_rw[RISCV_IOMMU_REG_PQT], 0); + ctrl_set = RISCV_IOMMU_PQCSR_PQON; + ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQMF | + RISCV_IOMMU_PQCSR_PQOF; + } else if (!enable && active) { + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQH], ~0); + ctrl_set = 0; + ctrl_clr = RISCV_IOMMU_PQCSR_BUSY | RISCV_IOMMU_PQCSR_PQON; + } else { + ctrl_set = 0; + ctrl_clr = RISCV_IOMMU_PQCSR_BUSY; + } + + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_PQCSR, ctrl_set, ctrl_clr); +} + +static void riscv_iommu_process_dbg(RISCVIOMMUState *s) +{ + uint64_t iova = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_IOVA); + uint64_t ctrl = riscv_iommu_reg_get64(s, RISCV_IOMMU_REG_TR_REQ_CTL); + unsigned devid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_DID); + unsigned pid = get_field(ctrl, RISCV_IOMMU_TR_REQ_CTL_PID); + RISCVIOMMUContext *ctx; + void *ref; + + if (!(ctrl & RISCV_IOMMU_TR_REQ_CTL_GO_BUSY)) { + return; + } + + ctx = riscv_iommu_ctx(s, devid, pid, &ref); + if (ctx == NULL) { + riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, + RISCV_IOMMU_TR_RESPONSE_FAULT | + (RISCV_IOMMU_FQ_CAUSE_DMA_DISABLED << 10)); + } else { + IOMMUTLBEntry iotlb = { + .iova = iova, + .perm = ctrl & RISCV_IOMMU_TR_REQ_CTL_NW ? IOMMU_RO : IOMMU_RW, + .addr_mask = ~0, + .target_as = NULL, + }; + int fault = riscv_iommu_translate(s, ctx, &iotlb, false); + if (fault) { + iova = RISCV_IOMMU_TR_RESPONSE_FAULT | (((uint64_t) fault) << 10); + } else { + iova = iotlb.translated_addr & ~iotlb.addr_mask; + iova >>= TARGET_PAGE_BITS; + iova &= RISCV_IOMMU_TR_RESPONSE_PPN; + + /* We do not support superpages (> 4kbs) for now */ + iova &= ~RISCV_IOMMU_TR_RESPONSE_S; + } + riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_TR_RESPONSE, iova); + } + + riscv_iommu_reg_mod64(s, RISCV_IOMMU_REG_TR_REQ_CTL, 0, + RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); + riscv_iommu_ctx_put(s, ref); +} + +typedef void riscv_iommu_process_fn(RISCVIOMMUState *s); + +static void riscv_iommu_update_icvec(RISCVIOMMUState *s, uint64_t data) +{ + uint64_t icvec = 0; + + icvec |= MIN(data & RISCV_IOMMU_ICVEC_CIV, + s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_CIV); + + icvec |= MIN(data & RISCV_IOMMU_ICVEC_FIV, + s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_FIV); + + icvec |= MIN(data & RISCV_IOMMU_ICVEC_PMIV, + s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PMIV); + + icvec |= MIN(data & RISCV_IOMMU_ICVEC_PIV, + s->icvec_avail_vectors & RISCV_IOMMU_ICVEC_PIV); + + trace_riscv_iommu_icvec_write(data, icvec); + + riscv_iommu_reg_set64(s, RISCV_IOMMU_REG_ICVEC, icvec); +} + +static void riscv_iommu_update_ipsr(RISCVIOMMUState *s, uint64_t data) +{ + uint32_t cqcsr, fqcsr, pqcsr; + uint32_t ipsr_set = 0; + uint32_t ipsr_clr = 0; + + if (data & RISCV_IOMMU_IPSR_CIP) { + cqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_CQCSR); + + if (cqcsr & RISCV_IOMMU_CQCSR_CIE && + (cqcsr & RISCV_IOMMU_CQCSR_FENCE_W_IP || + cqcsr & RISCV_IOMMU_CQCSR_CMD_ILL || + cqcsr & RISCV_IOMMU_CQCSR_CMD_TO || + cqcsr & RISCV_IOMMU_CQCSR_CQMF)) { + ipsr_set |= RISCV_IOMMU_IPSR_CIP; + } else { + ipsr_clr |= RISCV_IOMMU_IPSR_CIP; + } + } else { + ipsr_clr |= RISCV_IOMMU_IPSR_CIP; + } + + if (data & RISCV_IOMMU_IPSR_FIP) { + fqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_FQCSR); + + if (fqcsr & RISCV_IOMMU_FQCSR_FIE && + (fqcsr & RISCV_IOMMU_FQCSR_FQOF || + fqcsr & RISCV_IOMMU_FQCSR_FQMF)) { + ipsr_set |= RISCV_IOMMU_IPSR_FIP; + } else { + ipsr_clr |= RISCV_IOMMU_IPSR_FIP; + } + } else { + ipsr_clr |= RISCV_IOMMU_IPSR_FIP; + } + + if (data & RISCV_IOMMU_IPSR_PIP) { + pqcsr = riscv_iommu_reg_get32(s, RISCV_IOMMU_REG_PQCSR); + + if (pqcsr & RISCV_IOMMU_PQCSR_PIE && + (pqcsr & RISCV_IOMMU_PQCSR_PQOF || + pqcsr & RISCV_IOMMU_PQCSR_PQMF)) { + ipsr_set |= RISCV_IOMMU_IPSR_PIP; + } else { + ipsr_clr |= RISCV_IOMMU_IPSR_PIP; + } + } else { + ipsr_clr |= RISCV_IOMMU_IPSR_PIP; + } + + riscv_iommu_reg_mod32(s, RISCV_IOMMU_REG_IPSR, ipsr_set, ipsr_clr); +} + +/* + * Write the resulting value of 'data' for the reg specified + * by 'reg_addr', after considering read-only/read-write/write-clear + * bits, in the pointer 'dest'. + * + * The result is written in little-endian. + */ +static void riscv_iommu_write_reg_val(RISCVIOMMUState *s, + void *dest, hwaddr reg_addr, + int size, uint64_t data) +{ + uint64_t ro = ldn_le_p(&s->regs_ro[reg_addr], size); + uint64_t wc = ldn_le_p(&s->regs_wc[reg_addr], size); + uint64_t rw = ldn_le_p(&s->regs_rw[reg_addr], size); + + stn_le_p(dest, size, ((rw & ro) | (data & ~ro)) & ~(data & wc)); +} + +static MemTxResult riscv_iommu_mmio_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size, + MemTxAttrs attrs) +{ + riscv_iommu_process_fn *process_fn = NULL; + RISCVIOMMUState *s = opaque; + uint32_t regb = addr & ~3; + uint32_t busy = 0; + uint64_t val = 0; + + if ((addr & (size - 1)) != 0) { + /* Unsupported MMIO alignment or access size */ + return MEMTX_ERROR; + } + + if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { + /* Unsupported MMIO access location. */ + return MEMTX_ACCESS_ERROR; + } + + /* Track actionable MMIO write. */ + switch (regb) { + case RISCV_IOMMU_REG_DDTP: + case RISCV_IOMMU_REG_DDTP + 4: + process_fn = riscv_iommu_process_ddtp; + regb = RISCV_IOMMU_REG_DDTP; + busy = RISCV_IOMMU_DDTP_BUSY; + break; + + case RISCV_IOMMU_REG_CQT: + process_fn = riscv_iommu_process_cq_tail; + break; + + case RISCV_IOMMU_REG_CQCSR: + process_fn = riscv_iommu_process_cq_control; + busy = RISCV_IOMMU_CQCSR_BUSY; + break; + + case RISCV_IOMMU_REG_FQCSR: + process_fn = riscv_iommu_process_fq_control; + busy = RISCV_IOMMU_FQCSR_BUSY; + break; + + case RISCV_IOMMU_REG_PQCSR: + process_fn = riscv_iommu_process_pq_control; + busy = RISCV_IOMMU_PQCSR_BUSY; + break; + + case RISCV_IOMMU_REG_ICVEC: + case RISCV_IOMMU_REG_IPSR: + /* + * ICVEC and IPSR have special read/write procedures. We'll + * call their respective helpers and exit. + */ + riscv_iommu_write_reg_val(s, &val, addr, size, data); + + /* + * 'val' is stored as LE. Switch to host endianess + * before using it. + */ + val = le64_to_cpu(val); + + if (regb == RISCV_IOMMU_REG_ICVEC) { + riscv_iommu_update_icvec(s, val); + } else { + riscv_iommu_update_ipsr(s, val); + } + + return MEMTX_OK; + + case RISCV_IOMMU_REG_TR_REQ_CTL: + process_fn = riscv_iommu_process_dbg; + regb = RISCV_IOMMU_REG_TR_REQ_CTL; + busy = RISCV_IOMMU_TR_REQ_CTL_GO_BUSY; + break; + + default: + break; + } + + /* + * Registers update might be not synchronized with core logic. + * If system software updates register when relevant BUSY bit + * is set IOMMU behavior of additional writes to the register + * is UNSPECIFIED. + */ + riscv_iommu_write_reg_val(s, &s->regs_rw[addr], addr, size, data); + + /* Busy flag update, MSB 4-byte register. */ + if (busy) { + uint32_t rw = ldl_le_p(&s->regs_rw[regb]); + stl_le_p(&s->regs_rw[regb], rw | busy); + } + + if (process_fn) { + process_fn(s); + } + + return MEMTX_OK; +} + +static MemTxResult riscv_iommu_mmio_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, MemTxAttrs attrs) +{ + RISCVIOMMUState *s = opaque; + uint64_t val = -1; + uint8_t *ptr; + + if ((addr & (size - 1)) != 0) { + /* Unsupported MMIO alignment. */ + return MEMTX_ERROR; + } + + if (addr + size > RISCV_IOMMU_REG_MSI_CONFIG) { + return MEMTX_ACCESS_ERROR; + } + + ptr = &s->regs_rw[addr]; + val = ldn_le_p(ptr, size); + + *data = val; + + return MEMTX_OK; +} + +static const MemoryRegionOps riscv_iommu_mmio_ops = { + .read_with_attrs = riscv_iommu_mmio_read, + .write_with_attrs = riscv_iommu_mmio_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + .unaligned = false, + }, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + } +}; + +/* + * Translations matching MSI pattern check are redirected to "riscv-iommu-trap" + * memory region as untranslated address, for additional MSI/MRIF interception + * by IOMMU interrupt remapping implementation. + * Note: Device emulation code generating an MSI is expected to provide a valid + * memory transaction attributes with requested_id set. + */ +static MemTxResult riscv_iommu_trap_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size, MemTxAttrs attrs) +{ + RISCVIOMMUState* s = (RISCVIOMMUState *)opaque; + RISCVIOMMUContext *ctx; + MemTxResult res; + void *ref; + uint32_t devid = attrs.requester_id; + + if (attrs.unspecified) { + return MEMTX_ACCESS_ERROR; + } + + /* FIXME: PCIe bus remapping for attached endpoints. */ + devid |= s->bus << 8; + + ctx = riscv_iommu_ctx(s, devid, 0, &ref); + if (ctx == NULL) { + res = MEMTX_ACCESS_ERROR; + } else { + res = riscv_iommu_msi_write(s, ctx, addr, data, size, attrs); + } + riscv_iommu_ctx_put(s, ref); + return res; +} + +static MemTxResult riscv_iommu_trap_read(void *opaque, hwaddr addr, + uint64_t *data, unsigned size, MemTxAttrs attrs) +{ + return MEMTX_ACCESS_ERROR; +} + +static const MemoryRegionOps riscv_iommu_trap_ops = { + .read_with_attrs = riscv_iommu_trap_read, + .write_with_attrs = riscv_iommu_trap_write, + .endianness = DEVICE_LITTLE_ENDIAN, + .impl = { + .min_access_size = 4, + .max_access_size = 8, + .unaligned = true, + }, + .valid = { + .min_access_size = 4, + .max_access_size = 8, + } +}; + +static void riscv_iommu_realize(DeviceState *dev, Error **errp) +{ + RISCVIOMMUState *s = RISCV_IOMMU(dev); + + s->cap = s->version & RISCV_IOMMU_CAP_VERSION; + if (s->enable_msi) { + s->cap |= RISCV_IOMMU_CAP_MSI_FLAT | RISCV_IOMMU_CAP_MSI_MRIF; + } + if (s->enable_ats) { + s->cap |= RISCV_IOMMU_CAP_ATS; + } + if (s->enable_s_stage) { + s->cap |= RISCV_IOMMU_CAP_SV32 | RISCV_IOMMU_CAP_SV39 | + RISCV_IOMMU_CAP_SV48 | RISCV_IOMMU_CAP_SV57; + } + if (s->enable_g_stage) { + s->cap |= RISCV_IOMMU_CAP_SV32X4 | RISCV_IOMMU_CAP_SV39X4 | + RISCV_IOMMU_CAP_SV48X4 | RISCV_IOMMU_CAP_SV57X4; + } + /* Enable translation debug interface */ + s->cap |= RISCV_IOMMU_CAP_DBG; + + /* Report QEMU target physical address space limits */ + s->cap = set_field(s->cap, RISCV_IOMMU_CAP_PAS, + TARGET_PHYS_ADDR_SPACE_BITS); + + /* TODO: method to report supported PID bits */ + s->pid_bits = 8; /* restricted to size of MemTxAttrs.pid */ + s->cap |= RISCV_IOMMU_CAP_PD8; + + /* Out-of-reset translation mode: OFF (DMA disabled) BARE (passthrough) */ + s->ddtp = set_field(0, RISCV_IOMMU_DDTP_MODE, s->enable_off ? + RISCV_IOMMU_DDTP_MODE_OFF : RISCV_IOMMU_DDTP_MODE_BARE); + + /* register storage */ + s->regs_rw = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); + s->regs_ro = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); + s->regs_wc = g_new0(uint8_t, RISCV_IOMMU_REG_SIZE); + + /* Mark all registers read-only */ + memset(s->regs_ro, 0xff, RISCV_IOMMU_REG_SIZE); + + /* + * Register complete MMIO space, including MSI/PBA registers. + * Note, PCIDevice implementation will add overlapping MR for MSI/PBA, + * managed directly by the PCIDevice implementation. + */ + memory_region_init_io(&s->regs_mr, OBJECT(dev), &riscv_iommu_mmio_ops, s, + "riscv-iommu-regs", RISCV_IOMMU_REG_SIZE); + + /* Set power-on register state */ + stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_CAP], s->cap); + stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_FCTL], 0); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FCTL], + ~(RISCV_IOMMU_FCTL_BE | RISCV_IOMMU_FCTL_WSI)); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_DDTP], + ~(RISCV_IOMMU_DDTP_PPN | RISCV_IOMMU_DDTP_MODE)); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQB], + ~(RISCV_IOMMU_CQB_LOG2SZ | RISCV_IOMMU_CQB_PPN)); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQB], + ~(RISCV_IOMMU_FQB_LOG2SZ | RISCV_IOMMU_FQB_PPN)); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQB], + ~(RISCV_IOMMU_PQB_LOG2SZ | RISCV_IOMMU_PQB_PPN)); + stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQMF | + RISCV_IOMMU_CQCSR_CMD_TO | RISCV_IOMMU_CQCSR_CMD_ILL); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_CQCSR], RISCV_IOMMU_CQCSR_CQON | + RISCV_IOMMU_CQCSR_BUSY); + stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQMF | + RISCV_IOMMU_FQCSR_FQOF); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_FQCSR], RISCV_IOMMU_FQCSR_FQON | + RISCV_IOMMU_FQCSR_BUSY); + stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQMF | + RISCV_IOMMU_PQCSR_PQOF); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_PQCSR], RISCV_IOMMU_PQCSR_PQON | + RISCV_IOMMU_PQCSR_BUSY); + stl_le_p(&s->regs_wc[RISCV_IOMMU_REG_IPSR], ~0); + stl_le_p(&s->regs_ro[RISCV_IOMMU_REG_ICVEC], 0); + stq_le_p(&s->regs_rw[RISCV_IOMMU_REG_DDTP], s->ddtp); + /* If debug registers enabled. */ + if (s->cap & RISCV_IOMMU_CAP_DBG) { + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_IOVA], 0); + stq_le_p(&s->regs_ro[RISCV_IOMMU_REG_TR_REQ_CTL], + RISCV_IOMMU_TR_REQ_CTL_GO_BUSY); + } + + /* Memory region for downstream access, if specified. */ + if (s->target_mr) { + s->target_as = g_new0(AddressSpace, 1); + address_space_init(s->target_as, s->target_mr, + "riscv-iommu-downstream"); + } else { + /* Fallback to global system memory. */ + s->target_as = &address_space_memory; + } + + /* Memory region for untranslated MRIF/MSI writes */ + memory_region_init_io(&s->trap_mr, OBJECT(dev), &riscv_iommu_trap_ops, s, + "riscv-iommu-trap", ~0ULL); + address_space_init(&s->trap_as, &s->trap_mr, "riscv-iommu-trap-as"); + + /* Device translation context cache */ + s->ctx_cache = g_hash_table_new_full(riscv_iommu_ctx_hash, + riscv_iommu_ctx_equal, + g_free, NULL); + + s->iot_cache = g_hash_table_new_full(riscv_iommu_iot_hash, + riscv_iommu_iot_equal, + g_free, NULL); + + s->iommus.le_next = NULL; + s->iommus.le_prev = NULL; + QLIST_INIT(&s->spaces); +} + +static void riscv_iommu_unrealize(DeviceState *dev) +{ + RISCVIOMMUState *s = RISCV_IOMMU(dev); + + g_hash_table_unref(s->iot_cache); + g_hash_table_unref(s->ctx_cache); +} + +static Property riscv_iommu_properties[] = { + DEFINE_PROP_UINT32("version", RISCVIOMMUState, version, + RISCV_IOMMU_SPEC_DOT_VER), + DEFINE_PROP_UINT32("bus", RISCVIOMMUState, bus, 0x0), + DEFINE_PROP_UINT32("ioatc-limit", RISCVIOMMUState, iot_limit, + LIMIT_CACHE_IOT), + DEFINE_PROP_BOOL("intremap", RISCVIOMMUState, enable_msi, TRUE), + DEFINE_PROP_BOOL("ats", RISCVIOMMUState, enable_ats, TRUE), + DEFINE_PROP_BOOL("off", RISCVIOMMUState, enable_off, TRUE), + DEFINE_PROP_BOOL("s-stage", RISCVIOMMUState, enable_s_stage, TRUE), + DEFINE_PROP_BOOL("g-stage", RISCVIOMMUState, enable_g_stage, TRUE), + DEFINE_PROP_LINK("downstream-mr", RISCVIOMMUState, target_mr, + TYPE_MEMORY_REGION, MemoryRegion *), + DEFINE_PROP_END_OF_LIST(), +}; + +static void riscv_iommu_class_init(ObjectClass *klass, void* data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + /* internal device for riscv-iommu-{pci/sys}, not user-creatable */ + dc->user_creatable = false; + dc->realize = riscv_iommu_realize; + dc->unrealize = riscv_iommu_unrealize; + device_class_set_props(dc, riscv_iommu_properties); +} + +static const TypeInfo riscv_iommu_info = { + .name = TYPE_RISCV_IOMMU, + .parent = TYPE_DEVICE, + .instance_size = sizeof(RISCVIOMMUState), + .class_init = riscv_iommu_class_init, +}; + +static const char *IOMMU_FLAG_STR[] = { + "NA", + "RO", + "WR", + "RW", +}; + +/* RISC-V IOMMU Memory Region - Address Translation Space */ +static IOMMUTLBEntry riscv_iommu_memory_region_translate( + IOMMUMemoryRegion *iommu_mr, hwaddr addr, + IOMMUAccessFlags flag, int iommu_idx) +{ + RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); + RISCVIOMMUContext *ctx; + void *ref; + IOMMUTLBEntry iotlb = { + .iova = addr, + .target_as = as->iommu->target_as, + .addr_mask = ~0ULL, + .perm = flag, + }; + + ctx = riscv_iommu_ctx(as->iommu, as->devid, iommu_idx, &ref); + if (ctx == NULL) { + /* Translation disabled or invalid. */ + iotlb.addr_mask = 0; + iotlb.perm = IOMMU_NONE; + } else if (riscv_iommu_translate(as->iommu, ctx, &iotlb, true)) { + /* Translation disabled or fault reported. */ + iotlb.addr_mask = 0; + iotlb.perm = IOMMU_NONE; + } + + /* Trace all dma translations with original access flags. */ + trace_riscv_iommu_dma(as->iommu->parent_obj.id, PCI_BUS_NUM(as->devid), + PCI_SLOT(as->devid), PCI_FUNC(as->devid), iommu_idx, + IOMMU_FLAG_STR[flag & IOMMU_RW], iotlb.iova, + iotlb.translated_addr); + + riscv_iommu_ctx_put(as->iommu, ref); + + return iotlb; +} + +static int riscv_iommu_memory_region_notify( + IOMMUMemoryRegion *iommu_mr, IOMMUNotifierFlag old, + IOMMUNotifierFlag new, Error **errp) +{ + RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); + + if (old == IOMMU_NOTIFIER_NONE) { + as->notifier = true; + trace_riscv_iommu_notifier_add(iommu_mr->parent_obj.name); + } else if (new == IOMMU_NOTIFIER_NONE) { + as->notifier = false; + trace_riscv_iommu_notifier_del(iommu_mr->parent_obj.name); + } + + return 0; +} + +static inline bool pci_is_iommu(PCIDevice *pdev) +{ + return pci_get_word(pdev->config + PCI_CLASS_DEVICE) == 0x0806; +} + +static AddressSpace *riscv_iommu_find_as(PCIBus *bus, void *opaque, int devfn) +{ + RISCVIOMMUState *s = (RISCVIOMMUState *) opaque; + PCIDevice *pdev = pci_find_device(bus, pci_bus_num(bus), devfn); + AddressSpace *as = NULL; + + if (pdev && pci_is_iommu(pdev)) { + return s->target_as; + } + + /* Find first registered IOMMU device */ + while (s->iommus.le_prev) { + s = *(s->iommus.le_prev); + } + + /* Find first matching IOMMU */ + while (s != NULL && as == NULL) { + as = riscv_iommu_space(s, PCI_BUILD_BDF(pci_bus_num(bus), devfn)); + s = s->iommus.le_next; + } + + return as ? as : &address_space_memory; +} + +static const PCIIOMMUOps riscv_iommu_ops = { + .get_address_space = riscv_iommu_find_as, +}; + +void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, + Error **errp) +{ + if (bus->iommu_ops && + bus->iommu_ops->get_address_space == riscv_iommu_find_as) { + /* Allow multiple IOMMUs on the same PCIe bus, link known devices */ + RISCVIOMMUState *last = (RISCVIOMMUState *)bus->iommu_opaque; + QLIST_INSERT_AFTER(last, iommu, iommus); + } else if (!bus->iommu_ops && !bus->iommu_opaque) { + pci_setup_iommu(bus, &riscv_iommu_ops, iommu); + } else { + error_setg(errp, "can't register secondary IOMMU for PCI bus #%d", + pci_bus_num(bus)); + } +} + +static int riscv_iommu_memory_region_index(IOMMUMemoryRegion *iommu_mr, + MemTxAttrs attrs) +{ + return attrs.unspecified ? RISCV_IOMMU_NOPROCID : (int)attrs.pid; +} + +static int riscv_iommu_memory_region_index_len(IOMMUMemoryRegion *iommu_mr) +{ + RISCVIOMMUSpace *as = container_of(iommu_mr, RISCVIOMMUSpace, iova_mr); + return 1 << as->iommu->pid_bits; +} + +static void riscv_iommu_memory_region_init(ObjectClass *klass, void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); + + imrc->translate = riscv_iommu_memory_region_translate; + imrc->notify_flag_changed = riscv_iommu_memory_region_notify; + imrc->attrs_to_index = riscv_iommu_memory_region_index; + imrc->num_indexes = riscv_iommu_memory_region_index_len; +} + +static const TypeInfo riscv_iommu_memory_region_info = { + .parent = TYPE_IOMMU_MEMORY_REGION, + .name = TYPE_RISCV_IOMMU_MEMORY_REGION, + .class_init = riscv_iommu_memory_region_init, +}; + +static void riscv_iommu_register_mr_types(void) +{ + type_register_static(&riscv_iommu_memory_region_info); + type_register_static(&riscv_iommu_info); +} + +type_init(riscv_iommu_register_mr_types); diff --git a/hw/riscv/riscv-iommu.h b/hw/riscv/riscv-iommu.h new file mode 100644 index 0000000000..da3f03440c --- /dev/null +++ b/hw/riscv/riscv-iommu.h @@ -0,0 +1,130 @@ +/* + * QEMU emulation of an RISC-V IOMMU + * + * Copyright (C) 2022-2023 Rivos Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef HW_RISCV_IOMMU_STATE_H +#define HW_RISCV_IOMMU_STATE_H + +#include "qom/object.h" +#include "hw/riscv/iommu.h" + +struct RISCVIOMMUState { + /*< private >*/ + DeviceState parent_obj; + + /*< public >*/ + uint32_t version; /* Reported interface version number */ + uint32_t pid_bits; /* process identifier width */ + uint32_t bus; /* PCI bus mapping for non-root endpoints */ + + uint64_t cap; /* IOMMU supported capabilities */ + uint64_t fctl; /* IOMMU enabled features */ + uint64_t icvec_avail_vectors; /* Available interrupt vectors in ICVEC */ + + bool enable_off; /* Enable out-of-reset OFF mode (DMA disabled) */ + bool enable_msi; /* Enable MSI remapping */ + bool enable_ats; /* Enable ATS support */ + bool enable_s_stage; /* Enable S/VS-Stage translation */ + bool enable_g_stage; /* Enable G-Stage translation */ + + /* IOMMU Internal State */ + uint64_t ddtp; /* Validated Device Directory Tree Root Pointer */ + + dma_addr_t cq_addr; /* Command queue base physical address */ + dma_addr_t fq_addr; /* Fault/event queue base physical address */ + dma_addr_t pq_addr; /* Page request queue base physical address */ + + uint32_t cq_mask; /* Command queue index bit mask */ + uint32_t fq_mask; /* Fault/event queue index bit mask */ + uint32_t pq_mask; /* Page request queue index bit mask */ + + /* interrupt notifier */ + void (*notify)(RISCVIOMMUState *iommu, unsigned vector); + + /* IOMMU State Machine */ + QemuThread core_proc; /* Background processing thread */ + QemuCond core_cond; /* Background processing wake up signal */ + unsigned core_exec; /* Processing thread execution actions */ + + /* IOMMU target address space */ + AddressSpace *target_as; + MemoryRegion *target_mr; + + /* MSI / MRIF access trap */ + AddressSpace trap_as; + MemoryRegion trap_mr; + + GHashTable *ctx_cache; /* Device translation Context Cache */ + + GHashTable *iot_cache; /* IO Translated Address Cache */ + unsigned iot_limit; /* IO Translation Cache size limit */ + + /* MMIO Hardware Interface */ + MemoryRegion regs_mr; + uint8_t *regs_rw; /* register state (user write) */ + uint8_t *regs_wc; /* write-1-to-clear mask */ + uint8_t *regs_ro; /* read-only mask */ + + QLIST_ENTRY(RISCVIOMMUState) iommus; + QLIST_HEAD(, RISCVIOMMUSpace) spaces; +}; + +void riscv_iommu_pci_setup_iommu(RISCVIOMMUState *iommu, PCIBus *bus, + Error **errp); + +/* private helpers */ + +/* Register helper functions */ +static inline uint32_t riscv_iommu_reg_mod32(RISCVIOMMUState *s, + unsigned idx, uint32_t set, uint32_t clr) +{ + uint32_t val = ldl_le_p(s->regs_rw + idx); + stl_le_p(s->regs_rw + idx, (val & ~clr) | set); + return val; +} + +static inline void riscv_iommu_reg_set32(RISCVIOMMUState *s, unsigned idx, + uint32_t set) +{ + stl_le_p(s->regs_rw + idx, set); +} + +static inline uint32_t riscv_iommu_reg_get32(RISCVIOMMUState *s, unsigned idx) +{ + return ldl_le_p(s->regs_rw + idx); +} + +static inline uint64_t riscv_iommu_reg_mod64(RISCVIOMMUState *s, unsigned idx, + uint64_t set, uint64_t clr) +{ + uint64_t val = ldq_le_p(s->regs_rw + idx); + stq_le_p(s->regs_rw + idx, (val & ~clr) | set); + return val; +} + +static inline void riscv_iommu_reg_set64(RISCVIOMMUState *s, unsigned idx, + uint64_t set) +{ + stq_le_p(s->regs_rw + idx, set); +} + +static inline uint64_t riscv_iommu_reg_get64(RISCVIOMMUState *s, + unsigned idx) +{ + return ldq_le_p(s->regs_rw + idx); +} +#endif diff --git a/hw/riscv/sifive_u.c b/hw/riscv/sifive_u.c index 9b3dcf3a7a..c5e74126b1 100644 --- a/hw/riscv/sifive_u.c +++ b/hw/riscv/sifive_u.c @@ -645,7 +645,8 @@ static void sifive_u_machine_init(MachineState *machine) rom_add_blob_fixed_as("mrom.reset", reset_vec, sizeof(reset_vec), memmap[SIFIVE_U_DEV_MROM].base, &address_space_memory); - riscv_rom_copy_firmware_info(machine, memmap[SIFIVE_U_DEV_MROM].base, + riscv_rom_copy_firmware_info(machine, &s->soc.u_cpus, + memmap[SIFIVE_U_DEV_MROM].base, memmap[SIFIVE_U_DEV_MROM].size, sizeof(reset_vec), kernel_entry); diff --git a/hw/riscv/trace-events b/hw/riscv/trace-events new file mode 100644 index 0000000000..0527c56c91 --- /dev/null +++ b/hw/riscv/trace-events @@ -0,0 +1,17 @@ +# See documentation at docs/devel/tracing.rst + +# riscv-iommu.c +riscv_iommu_new(const char *id, unsigned b, unsigned d, unsigned f) "%s: device attached %04x:%02x.%d" +riscv_iommu_flt(const char *id, unsigned b, unsigned d, unsigned f, uint64_t reason, uint64_t iova) "%s: fault %04x:%02x.%u reason: 0x%"PRIx64" iova: 0x%"PRIx64 +riscv_iommu_pri(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova) "%s: page request %04x:%02x.%u iova: 0x%"PRIx64 +riscv_iommu_dma(const char *id, unsigned b, unsigned d, unsigned f, unsigned pasid, const char *dir, uint64_t iova, uint64_t phys) "%s: translate %04x:%02x.%u #%u %s 0x%"PRIx64" -> 0x%"PRIx64 +riscv_iommu_msi(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova, uint64_t phys) "%s: translate %04x:%02x.%u MSI 0x%"PRIx64" -> 0x%"PRIx64 +riscv_iommu_mrif_notification(const char *id, uint32_t nid, uint64_t phys) "%s: sent MRIF notification 0x%x to 0x%"PRIx64 +riscv_iommu_cmd(const char *id, uint64_t l, uint64_t u) "%s: command 0x%"PRIx64" 0x%"PRIx64 +riscv_iommu_notifier_add(const char *id) "%s: dev-iotlb notifier added" +riscv_iommu_notifier_del(const char *id) "%s: dev-iotlb notifier removed" +riscv_iommu_notify_int_vector(uint32_t cause, uint32_t vector) "Interrupt cause 0x%x sent via vector 0x%x" +riscv_iommu_icvec_write(uint32_t orig, uint32_t actual) "ICVEC write: incoming 0x%x actual 0x%x" +riscv_iommu_ats(const char *id, unsigned b, unsigned d, unsigned f, uint64_t iova) "%s: translate request %04x:%02x.%u iova: 0x%"PRIx64 +riscv_iommu_ats_inval(const char *id) "%s: dev-iotlb invalidate" +riscv_iommu_ats_prgr(const char *id) "%s: dev-iotlb page request group response" diff --git a/hw/riscv/trace.h b/hw/riscv/trace.h new file mode 100644 index 0000000000..8c0e3ca1f3 --- /dev/null +++ b/hw/riscv/trace.h @@ -0,0 +1 @@ +#include "trace/trace-hw_riscv.h" diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index ee3129f3b3..45a8c4f819 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -32,6 +32,7 @@ #include "hw/core/sysbus-fdt.h" #include "target/riscv/pmu.h" #include "hw/riscv/riscv_hart.h" +#include "hw/riscv/iommu.h" #include "hw/riscv/virt.h" #include "hw/riscv/boot.h" #include "hw/riscv/numa.h" @@ -1032,6 +1033,30 @@ static void create_fdt_virtio_iommu(RISCVVirtState *s, uint16_t bdf) bdf + 1, iommu_phandle, bdf + 1, 0xffff - bdf); } +static void create_fdt_iommu(RISCVVirtState *s, uint16_t bdf) +{ + const char comp[] = "riscv,pci-iommu"; + void *fdt = MACHINE(s)->fdt; + uint32_t iommu_phandle; + g_autofree char *iommu_node = NULL; + g_autofree char *pci_node = NULL; + + pci_node = g_strdup_printf("/soc/pci@%lx", + (long) virt_memmap[VIRT_PCIE_ECAM].base); + iommu_node = g_strdup_printf("%s/iommu@%x", pci_node, bdf); + iommu_phandle = qemu_fdt_alloc_phandle(fdt); + qemu_fdt_add_subnode(fdt, iommu_node); + + qemu_fdt_setprop(fdt, iommu_node, "compatible", comp, sizeof(comp)); + qemu_fdt_setprop_cell(fdt, iommu_node, "#iommu-cells", 1); + qemu_fdt_setprop_cell(fdt, iommu_node, "phandle", iommu_phandle); + qemu_fdt_setprop_cells(fdt, iommu_node, "reg", + bdf << 8, 0, 0, 0, 0); + qemu_fdt_setprop_cells(fdt, pci_node, "iommu-map", + 0, iommu_phandle, 0, bdf, + bdf + 1, iommu_phandle, bdf + 1, 0xffff - bdf); +} + static void finalize_fdt(RISCVVirtState *s) { uint32_t phandle = 1, irq_mmio_phandle = 1, msi_pcie_phandle = 1; @@ -1738,9 +1763,11 @@ static HotplugHandler *virt_machine_get_hotplug_handler(MachineState *machine, MachineClass *mc = MACHINE_GET_CLASS(machine); if (device_is_dynamic_sysbus(mc, dev) || - object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { + object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) || + object_dynamic_cast(OBJECT(dev), TYPE_RISCV_IOMMU_PCI)) { return HOTPLUG_HANDLER(machine); } + return NULL; } @@ -1761,6 +1788,10 @@ static void virt_machine_device_plug_cb(HotplugHandler *hotplug_dev, if (object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI)) { create_fdt_virtio_iommu(s, pci_get_bdf(PCI_DEVICE(dev))); } + + if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_IOMMU_PCI)) { + create_fdt_iommu(s, pci_get_bdf(PCI_DEVICE(dev))); + } } static void virt_machine_class_init(ObjectClass *oc, void *data) diff --git a/include/exec/memattrs.h b/include/exec/memattrs.h index 14cdd8d582..e27c18f3dc 100644 --- a/include/exec/memattrs.h +++ b/include/exec/memattrs.h @@ -52,6 +52,11 @@ typedef struct MemTxAttrs { unsigned int memory:1; /* Requester ID (for MSI for example) */ unsigned int requester_id:16; + + /* + * PID (PCI PASID) support: Limited to 8 bits process identifier. + */ + unsigned int pid:8; } MemTxAttrs; /* Bus masters which don't specify any attributes will get this, diff --git a/include/hw/char/sifive_uart.h b/include/hw/char/sifive_uart.h index 7f6c79f8bd..0846cf6218 100644 --- a/include/hw/char/sifive_uart.h +++ b/include/hw/char/sifive_uart.h @@ -24,6 +24,7 @@ #include "hw/qdev-properties.h" #include "hw/sysbus.h" #include "qom/object.h" +#include "qemu/fifo8.h" enum { SIFIVE_UART_TXFIFO = 0, @@ -48,9 +49,13 @@ enum { SIFIVE_UART_IP_RXWM = 2 /* Receive watermark interrupt pending */ }; +#define SIFIVE_UART_TXFIFO_FULL 0x80000000 + #define SIFIVE_UART_GET_TXCNT(txctrl) ((txctrl >> 16) & 0x7) #define SIFIVE_UART_GET_RXCNT(rxctrl) ((rxctrl >> 16) & 0x7) + #define SIFIVE_UART_RX_FIFO_SIZE 8 +#define SIFIVE_UART_TX_FIFO_SIZE 8 #define TYPE_SIFIVE_UART "riscv.sifive.uart" OBJECT_DECLARE_SIMPLE_TYPE(SiFiveUARTState, SIFIVE_UART) @@ -63,13 +68,20 @@ struct SiFiveUARTState { qemu_irq irq; MemoryRegion mmio; CharBackend chr; - uint8_t rx_fifo[SIFIVE_UART_RX_FIFO_SIZE]; - uint8_t rx_fifo_len; + + uint32_t txfifo; uint32_t ie; uint32_t ip; uint32_t txctrl; uint32_t rxctrl; uint32_t div; + + uint8_t rx_fifo[SIFIVE_UART_RX_FIFO_SIZE]; + uint8_t rx_fifo_len; + + Fifo8 tx_fifo; + + QEMUTimer *fifo_trigger_handle; }; SiFiveUARTState *sifive_uart_create(MemoryRegion *address_space, hwaddr base, diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index eb26cac810..35d4fe0bbf 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -116,6 +116,7 @@ extern bool pci_available; #define PCI_DEVICE_ID_REDHAT_PVPANIC 0x0011 #define PCI_DEVICE_ID_REDHAT_ACPI_ERST 0x0012 #define PCI_DEVICE_ID_REDHAT_UFS 0x0013 +#define PCI_DEVICE_ID_REDHAT_RISCV_IOMMU 0x0014 #define PCI_DEVICE_ID_REDHAT_QXL 0x0100 #define FMT_PCIBUS PRIx64 diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h index 18bfe9f7bf..f778b560de 100644 --- a/include/hw/riscv/boot.h +++ b/include/hw/riscv/boot.h @@ -56,7 +56,9 @@ void riscv_setup_rom_reset_vec(MachineState *machine, RISCVHartArrayState *harts hwaddr rom_base, hwaddr rom_size, uint64_t kernel_entry, uint64_t fdt_load_addr); -void riscv_rom_copy_firmware_info(MachineState *machine, hwaddr rom_base, +void riscv_rom_copy_firmware_info(MachineState *machine, + RISCVHartArrayState *harts, + hwaddr rom_base, hwaddr rom_size, uint32_t reset_vec_size, uint64_t kernel_entry); diff --git a/include/hw/riscv/boot_opensbi.h b/include/hw/riscv/boot_opensbi.h index 1b749663dc..18664a174b 100644 --- a/include/hw/riscv/boot_opensbi.h +++ b/include/hw/riscv/boot_opensbi.h @@ -58,4 +58,33 @@ struct fw_dynamic_info { target_long boot_hart; }; +/** Representation dynamic info passed by previous booting stage */ +struct fw_dynamic_info32 { + /** Info magic */ + int32_t magic; + /** Info version */ + int32_t version; + /** Next booting stage address */ + int32_t next_addr; + /** Next booting stage mode */ + int32_t next_mode; + /** Options for OpenSBI library */ + int32_t options; + /** + * Preferred boot HART id + * + * It is possible that the previous booting stage uses same link + * address as the FW_DYNAMIC firmware. In this case, the relocation + * lottery mechanism can potentially overwrite the previous booting + * stage while other HARTs are still running in the previous booting + * stage leading to boot-time crash. To avoid this boot-time crash, + * the previous booting stage can specify last HART that will jump + * to the FW_DYNAMIC firmware as the preferred boot HART. + * + * To avoid specifying a preferred boot HART, the previous booting + * stage can set it to -1UL which will force the FW_DYNAMIC firmware + * to use the relocation lottery mechanism. + */ + int32_t boot_hart; +}; #endif diff --git a/include/hw/riscv/iommu.h b/include/hw/riscv/iommu.h new file mode 100644 index 0000000000..80769a1400 --- /dev/null +++ b/include/hw/riscv/iommu.h @@ -0,0 +1,36 @@ +/* + * QEMU emulation of an RISC-V IOMMU + * + * Copyright (C) 2022-2023 Rivos Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#ifndef HW_RISCV_IOMMU_H +#define HW_RISCV_IOMMU_H + +#include "qemu/osdep.h" +#include "qom/object.h" + +#define TYPE_RISCV_IOMMU "riscv-iommu" +OBJECT_DECLARE_SIMPLE_TYPE(RISCVIOMMUState, RISCV_IOMMU) +typedef struct RISCVIOMMUState RISCVIOMMUState; + +#define TYPE_RISCV_IOMMU_MEMORY_REGION "riscv-iommu-mr" +typedef struct RISCVIOMMUSpace RISCVIOMMUSpace; + +#define TYPE_RISCV_IOMMU_PCI "riscv-iommu-pci" +OBJECT_DECLARE_SIMPLE_TYPE(RISCVIOMMUStatePci, RISCV_IOMMU_PCI) +typedef struct RISCVIOMMUStatePci RISCVIOMMUStatePci; + +#endif diff --git a/meson.build b/meson.build index 92168b915a..2c9086a3fe 100644 --- a/meson.build +++ b/meson.build @@ -3472,6 +3472,7 @@ if have_system 'hw/pci-host', 'hw/ppc', 'hw/rtc', + 'hw/riscv', 'hw/s390x', 'hw/scsi', 'hw/sd', diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h index 4464c0fd7a..62115375cd 100644 --- a/target/riscv/cpu-qom.h +++ b/target/riscv/cpu-qom.h @@ -30,6 +30,7 @@ #define RISCV_CPU_TYPE_NAME(name) (name RISCV_CPU_TYPE_SUFFIX) #define TYPE_RISCV_CPU_MAX RISCV_CPU_TYPE_NAME("max") +#define TYPE_RISCV_CPU_MAX32 RISCV_CPU_TYPE_NAME("max32") #define TYPE_RISCV_CPU_BASE32 RISCV_CPU_TYPE_NAME("rv32") #define TYPE_RISCV_CPU_BASE64 RISCV_CPU_TYPE_NAME("rv64") #define TYPE_RISCV_CPU_BASE128 RISCV_CPU_TYPE_NAME("x-rv128") diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 658bdb4ae1..f219f0c3b5 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -106,6 +106,8 @@ const RISCVIsaExtData isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(ziccif, PRIV_VERSION_1_11_0, has_priv_1_11), ISA_EXT_DATA_ENTRY(zicclsm, PRIV_VERSION_1_11_0, has_priv_1_11), ISA_EXT_DATA_ENTRY(ziccrse, PRIV_VERSION_1_11_0, has_priv_1_11), + ISA_EXT_DATA_ENTRY(zicfilp, PRIV_VERSION_1_12_0, ext_zicfilp), + ISA_EXT_DATA_ENTRY(zicfiss, PRIV_VERSION_1_13_0, ext_zicfiss), ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond), ISA_EXT_DATA_ENTRY(zicntr, PRIV_VERSION_1_12_0, ext_zicntr), ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_zicsr), @@ -449,11 +451,9 @@ static void riscv_max_cpu_init(Object *obj) env->priv_ver = PRIV_VERSION_LATEST; #ifndef CONFIG_USER_ONLY -#ifdef TARGET_RISCV32 - set_satp_mode_max_supported(cpu, VM_1_10_SV32); -#else - set_satp_mode_max_supported(cpu, VM_1_10_SV57); -#endif + set_satp_mode_max_supported(RISCV_CPU(obj), + riscv_cpu_mxl(&RISCV_CPU(obj)->env) == MXL_RV32 ? + VM_1_10_SV32 : VM_1_10_SV57); #endif } @@ -615,7 +615,10 @@ static void rv64e_bare_cpu_init(Object *obj) riscv_cpu_set_misa_ext(env, RVE); } -#else /* !TARGET_RISCV64 */ +#endif /* !TARGET_RISCV64 */ + +#if defined(TARGET_RISCV32) || \ + (defined(TARGET_RISCV64) && !defined(CONFIG_USER_ONLY)) static void rv32_base_cpu_init(Object *obj) { @@ -1003,12 +1006,23 @@ static void riscv_cpu_reset_hold(Object *obj, ResetType type) } pmp_unlock_entries(env); +#else + env->priv = PRV_U; + env->senvcfg = 0; + env->menvcfg = 0; #endif + + /* on reset elp is clear */ + env->elp = false; + /* on reset ssp is set to 0 */ + env->ssp = 0; + env->xl = riscv_cpu_mxl(env); riscv_cpu_update_mask(env); cs->exception_index = RISCV_EXCP_NONE; env->load_res = -1; set_default_nan_mode(1, &env->fp_status); + env->vill = true; #ifndef CONFIG_USER_ONLY if (cpu->cfg.debug) { @@ -1460,6 +1474,8 @@ const RISCVCPUMultiExtConfig riscv_cpu_extensions[] = { MULTI_EXT_CFG_BOOL("sscofpmf", ext_sscofpmf, false), MULTI_EXT_CFG_BOOL("smcntrpmf", ext_smcntrpmf, false), MULTI_EXT_CFG_BOOL("zifencei", ext_zifencei, true), + MULTI_EXT_CFG_BOOL("zicfilp", ext_zicfilp, false), + MULTI_EXT_CFG_BOOL("zicfiss", ext_zicfiss, false), MULTI_EXT_CFG_BOOL("zicsr", ext_zicsr, true), MULTI_EXT_CFG_BOOL("zihintntl", ext_zihintntl, true), MULTI_EXT_CFG_BOOL("zihintpause", ext_zihintpause, true), @@ -2941,6 +2957,12 @@ static const TypeInfo riscv_cpu_type_infos[] = { }, #if defined(TARGET_RISCV32) DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX, MXL_RV32, riscv_max_cpu_init), +#elif defined(TARGET_RISCV64) + DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX, MXL_RV64, riscv_max_cpu_init), +#endif + +#if defined(TARGET_RISCV32) || \ + (defined(TARGET_RISCV64) && !defined(CONFIG_USER_ONLY)) DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE32, MXL_RV32, rv32_base_cpu_init), DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_IBEX, MXL_RV32, rv32_ibex_cpu_init), DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_E31, MXL_RV32, rv32_sifive_e_cpu_init), @@ -2948,8 +2970,13 @@ static const TypeInfo riscv_cpu_type_infos[] = { DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_U34, MXL_RV32, rv32_sifive_u_cpu_init), DEFINE_BARE_CPU(TYPE_RISCV_CPU_RV32I, MXL_RV32, rv32i_bare_cpu_init), DEFINE_BARE_CPU(TYPE_RISCV_CPU_RV32E, MXL_RV32, rv32e_bare_cpu_init), -#elif defined(TARGET_RISCV64) - DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX, MXL_RV64, riscv_max_cpu_init), +#endif + +#if (defined(TARGET_RISCV64) && !defined(CONFIG_USER_ONLY)) + DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_MAX32, MXL_RV32, riscv_max_cpu_init), +#endif + +#if defined(TARGET_RISCV64) DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE64, MXL_RV64, rv64_base_cpu_init), DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_E51, MXL_RV64, rv64_sifive_e_cpu_init), DEFINE_VENDOR_CPU(TYPE_RISCV_CPU_SIFIVE_U54, MXL_RV64, rv64_sifive_u_cpu_init), diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h index 1619c3acb6..284b112821 100644 --- a/target/riscv/cpu.h +++ b/target/riscv/cpu.h @@ -46,8 +46,13 @@ typedef struct CPUArchState CPURISCVState; /* * RISC-V-specific extra insn start words: * 1: Original instruction opcode + * 2: more information about instruction */ -#define TARGET_INSN_START_EXTRA_WORDS 1 +#define TARGET_INSN_START_EXTRA_WORDS 2 +/* + * b0: Whether a instruction always raise a store AMO or not. + */ +#define RISCV_UW2_ALWAYS_STORE_AMO 1 #define RV(x) ((target_ulong)1 << (x - 'A')) @@ -230,12 +235,24 @@ struct CPUArchState { target_ulong jvt; + /* elp state for zicfilp extension */ + bool elp; + /* shadow stack register for zicfiss extension */ + target_ulong ssp; + /* env place holder for extra word 2 during unwind */ + target_ulong excp_uw2; + /* sw check code for sw check exception */ + target_ulong sw_check_code; #ifdef CONFIG_USER_ONLY uint32_t elf_flags; #endif -#ifndef CONFIG_USER_ONLY target_ulong priv; + /* CSRs for execution environment configuration */ + uint64_t menvcfg; + target_ulong senvcfg; + +#ifndef CONFIG_USER_ONLY /* This contains QEMU specific information about the virt state. */ bool virt_enabled; target_ulong geilen; @@ -445,12 +462,9 @@ struct CPUArchState { target_ulong upmmask; target_ulong upmbase; - /* CSRs for execution environment configuration */ - uint64_t menvcfg; uint64_t mstateen[SMSTATEEN_MAX_COUNT]; uint64_t hstateen[SMSTATEEN_MAX_COUNT]; uint64_t sstateen[SMSTATEEN_MAX_COUNT]; - target_ulong senvcfg; uint64_t henvcfg; #endif target_ulong cur_pmmask; @@ -544,6 +558,8 @@ void riscv_cpu_set_geilen(CPURISCVState *env, target_ulong geilen); bool riscv_cpu_vector_enabled(CPURISCVState *env); void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool enable); int riscv_env_mmu_index(CPURISCVState *env, bool ifetch); +bool cpu_get_fcfien(CPURISCVState *env); +bool cpu_get_bcfien(CPURISCVState *env); G_NORETURN void riscv_cpu_do_unaligned_access(CPUState *cs, vaddr addr, MMUAccessType access_type, int mmu_idx, uintptr_t retaddr); @@ -616,6 +632,11 @@ FIELD(TB_FLAGS, ITRIGGER, 22, 1) FIELD(TB_FLAGS, VIRT_ENABLED, 23, 1) FIELD(TB_FLAGS, PRIV, 24, 2) FIELD(TB_FLAGS, AXL, 26, 2) +/* zicfilp needs a TB flag to track indirect branches */ +FIELD(TB_FLAGS, FCFI_ENABLED, 28, 1) +FIELD(TB_FLAGS, FCFI_LP_EXPECTED, 29, 1) +/* zicfiss needs a TB flag so that correct TB is located based on tb flags */ +FIELD(TB_FLAGS, BCFI_ENABLED, 30, 1) #ifdef TARGET_RISCV32 #define riscv_cpu_mxl(env) ((void)(env), MXL_RV32) @@ -709,8 +730,11 @@ static inline RISCVMXL riscv_cpu_sxl(CPURISCVState *env) #ifdef CONFIG_USER_ONLY return env->misa_mxl; #else - return get_field(env->mstatus, MSTATUS64_SXL); + if (env->misa_mxl != MXL_RV32) { + return get_field(env->mstatus, MSTATUS64_SXL); + } #endif + return MXL_RV32; } #endif diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index 7e3f629356..385a2c67c2 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -34,6 +34,9 @@ /* Control and Status Registers */ +/* zicfiss user ssp csr */ +#define CSR_SSP 0x011 + /* User Trap Setup */ #define CSR_USTATUS 0x000 #define CSR_UIE 0x004 @@ -552,6 +555,8 @@ #define MSTATUS_TVM 0x00100000 /* since: priv-1.10 */ #define MSTATUS_TW 0x00200000 /* since: priv-1.10 */ #define MSTATUS_TSR 0x00400000 /* since: priv-1.10 */ +#define MSTATUS_SPELP 0x00800000 /* zicfilp */ +#define MSTATUS_MPELP 0x020000000000 /* zicfilp */ #define MSTATUS_GVA 0x4000000000ULL #define MSTATUS_MPV 0x8000000000ULL @@ -582,6 +587,7 @@ typedef enum { #define SSTATUS_XS 0x00018000 #define SSTATUS_SUM 0x00040000 /* since: priv-1.10 */ #define SSTATUS_MXR 0x00080000 +#define SSTATUS_SPELP MSTATUS_SPELP /* zicfilp */ #define SSTATUS64_UXL 0x0000000300000000ULL @@ -689,6 +695,11 @@ typedef enum RISCVException { RISCV_EXCP_SEMIHOST = 0x3f, } RISCVException; +/* zicfilp defines lp violation results in sw check with tval = 2*/ +#define RISCV_EXCP_SW_CHECK_FCFI_TVAL 2 +/* zicfiss defines ss violation results in sw check with tval = 3*/ +#define RISCV_EXCP_SW_CHECK_BCFI_TVAL 3 + #define RISCV_EXCP_INT_FLAG 0x80000000 #define RISCV_EXCP_INT_MASK 0x7fffffff @@ -754,6 +765,8 @@ typedef enum RISCVException { /* Execution environment configuration bits */ #define MENVCFG_FIOM BIT(0) +#define MENVCFG_LPE BIT(2) /* zicfilp */ +#define MENVCFG_SSE BIT(3) /* zicfiss */ #define MENVCFG_CBIE (3UL << 4) #define MENVCFG_CBCFE BIT(6) #define MENVCFG_CBZE BIT(7) @@ -767,11 +780,15 @@ typedef enum RISCVException { #define MENVCFGH_STCE BIT(31) #define SENVCFG_FIOM MENVCFG_FIOM +#define SENVCFG_LPE MENVCFG_LPE +#define SENVCFG_SSE MENVCFG_SSE #define SENVCFG_CBIE MENVCFG_CBIE #define SENVCFG_CBCFE MENVCFG_CBCFE #define SENVCFG_CBZE MENVCFG_CBZE #define HENVCFG_FIOM MENVCFG_FIOM +#define HENVCFG_LPE MENVCFG_LPE +#define HENVCFG_SSE MENVCFG_SSE #define HENVCFG_CBIE MENVCFG_CBIE #define HENVCFG_CBCFE MENVCFG_CBCFE #define HENVCFG_CBZE MENVCFG_CBZE diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index 355afedfd3..59d6fc445d 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -67,6 +67,8 @@ struct RISCVCPUConfig { bool ext_zicbom; bool ext_zicbop; bool ext_zicboz; + bool ext_zicfilp; + bool ext_zicfiss; bool ext_zicond; bool ext_zihintntl; bool ext_zihintpause; diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index a935377b4a..0a3ead69ea 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -33,6 +33,7 @@ #include "cpu_bits.h" #include "debug.h" #include "tcg/oversized-guest.h" +#include "pmp.h" int riscv_env_mmu_index(CPURISCVState *env, bool ifetch) { @@ -63,6 +64,62 @@ int riscv_env_mmu_index(CPURISCVState *env, bool ifetch) #endif } +bool cpu_get_fcfien(CPURISCVState *env) +{ + /* no cfi extension, return false */ + if (!env_archcpu(env)->cfg.ext_zicfilp) { + return false; + } + + switch (env->priv) { + case PRV_U: + if (riscv_has_ext(env, RVS)) { + return env->senvcfg & SENVCFG_LPE; + } + return env->menvcfg & MENVCFG_LPE; +#ifndef CONFIG_USER_ONLY + case PRV_S: + if (env->virt_enabled) { + return env->henvcfg & HENVCFG_LPE; + } + return env->menvcfg & MENVCFG_LPE; + case PRV_M: + return env->mseccfg & MSECCFG_MLPE; +#endif + default: + g_assert_not_reached(); + } +} + +bool cpu_get_bcfien(CPURISCVState *env) +{ + /* no cfi extension, return false */ + if (!env_archcpu(env)->cfg.ext_zicfiss) { + return false; + } + + switch (env->priv) { + case PRV_U: + /* + * If S is not implemented then shadow stack for U can't be turned on + * It is checked in `riscv_cpu_validate_set_extensions`, so no need to + * check here or assert here + */ + return env->senvcfg & SENVCFG_SSE; +#ifndef CONFIG_USER_ONLY + case PRV_S: + if (env->virt_enabled) { + return env->henvcfg & HENVCFG_SSE; + } + return env->menvcfg & MENVCFG_SSE; + case PRV_M: /* M-mode shadow stack is always off */ + return false; +#endif + default: + g_assert_not_reached(); + } +} + void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc, uint64_t *cs_base, uint32_t *pflags) { @@ -104,6 +161,20 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, vaddr *pc, flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1); } + if (cpu_get_fcfien(env)) { + /* + * For Forward CFI, only the expectation of a lpad at + * the start of the block is tracked via env->elp. env->elp + * is turned on during jalr translation. + */ + flags = FIELD_DP32(flags, TB_FLAGS, FCFI_LP_EXPECTED, env->elp); + flags = FIELD_DP32(flags, TB_FLAGS, FCFI_ENABLED, 1); + } + + if (cpu_get_bcfien(env)) { + flags = FIELD_DP32(flags, TB_FLAGS, BCFI_ENABLED, 1); + } + #ifdef CONFIG_USER_ONLY fs = EXT_STATUS_DIRTY; vs = EXT_STATUS_DIRTY; @@ -546,6 +617,15 @@ void riscv_cpu_swap_hypervisor_regs(CPURISCVState *env) } bool current_virt = env->virt_enabled; + /* + * If zicfilp extension available and henvcfg.LPE = 1, + * then apply SPELP mask on mstatus + */ + if (env_archcpu(env)->cfg.ext_zicfilp && + get_field(env->henvcfg, HENVCFG_LPE)) { + mstatus_mask |= SSTATUS_SPELP; + } + g_assert(riscv_has_ext(env, RVH)); if (current_virt) { @@ -804,7 +884,7 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, target_ulong *fault_pte_addr, int access_type, int mmu_idx, bool first_stage, bool two_stage, - bool is_debug) + bool is_debug, bool is_probe) { /* * NOTE: the env->pc value visible here will not be @@ -818,6 +898,8 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, hwaddr ppn; int napot_bits = 0; target_ulong napot_mask; + bool is_sstack_idx = ((mmu_idx & MMU_IDX_SS_WRITE) == MMU_IDX_SS_WRITE); + bool sstack_page = false; /* * Check if we should use the background registers for the two @@ -890,12 +972,14 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, CPUState *cs = env_cpu(env); int va_bits = PGSHIFT + levels * ptidxbits + widened; + int sxlen = 16 << riscv_cpu_sxl(env); + int sxlen_bytes = sxlen / 8; if (first_stage == true) { target_ulong mask, masked_msbs; - if (TARGET_LONG_BITS > (va_bits - 1)) { - mask = (1L << (TARGET_LONG_BITS - (va_bits - 1))) - 1; + if (sxlen > (va_bits - 1)) { + mask = (1L << (sxlen - (va_bits - 1))) - 1; } else { mask = 0; } @@ -948,7 +1032,7 @@ restart: int vbase_ret = get_physical_address(env, &vbase, &vbase_prot, base, NULL, MMU_DATA_LOAD, MMUIdx_U, false, true, - is_debug); + is_debug, false); if (vbase_ret != TRANSLATE_SUCCESS) { if (fault_pte_addr) { @@ -964,7 +1048,7 @@ restart: int pmp_prot; int pmp_ret = get_physical_address_pmp(env, &pmp_prot, pte_addr, - sizeof(target_ulong), + sxlen_bytes, MMU_DATA_LOAD, PRV_S); if (pmp_ret != TRANSLATE_SUCCESS) { return TRANSLATE_PMP_FAIL; @@ -1026,21 +1110,43 @@ restart: return TRANSLATE_FAIL; } + target_ulong rwx = pte & (PTE_R | PTE_W | PTE_X); /* Check for reserved combinations of RWX flags. */ - switch (pte & (PTE_R | PTE_W | PTE_X)) { - case PTE_W: + switch (rwx) { case PTE_W | PTE_X: return TRANSLATE_FAIL; + case PTE_W: + /* if bcfi enabled, PTE_W is not reserved and shadow stack page */ + if (cpu_get_bcfien(env) && first_stage) { + sstack_page = true; + /* + * if ss index, read and write allowed. else if not a probe + * then only read allowed + */ + rwx = is_sstack_idx ? (PTE_R | PTE_W) : (is_probe ? 0 : PTE_R); + break; + } + return TRANSLATE_FAIL; + case PTE_R: + /* + * no matter what's the `access_type`, shadow stack access to readonly + * memory are always store page faults. During unwind, loads will be + * promoted as store fault. + */ + if (is_sstack_idx) { + return TRANSLATE_FAIL; + } + break; } int prot = 0; - if (pte & PTE_R) { + if (rwx & PTE_R) { prot |= PAGE_READ; } - if (pte & PTE_W) { + if (rwx & PTE_W) { prot |= PAGE_WRITE; } - if (pte & PTE_X) { + if (rwx & PTE_X) { bool mxr = false; /* @@ -1084,8 +1190,11 @@ restart: } if (!((prot >> access_type) & 1)) { - /* Access check failed */ - return TRANSLATE_FAIL; + /* + * Access check failed, access check failures for shadow stack are + * access faults. + */ + return sstack_page ? TRANSLATE_PMP_FAIL : TRANSLATE_FAIL; } target_ulong updated_pte = pte; @@ -1116,7 +1225,7 @@ restart: * it is no longer valid and we must re-walk the page table. */ MemoryRegion *mr; - hwaddr l = sizeof(target_ulong), addr1; + hwaddr l = sxlen_bytes, addr1; mr = address_space_translate(cs->as, pte_addr, &addr1, &l, false, MEMTXATTRS_UNSPECIFIED); if (memory_region_is_ram(mr)) { @@ -1128,7 +1237,12 @@ restart: */ *pte_pa = pte = updated_pte; #else - target_ulong old_pte = qatomic_cmpxchg(pte_pa, pte, updated_pte); + target_ulong old_pte; + if (riscv_cpu_sxl(env) == MXL_RV32) { + old_pte = qatomic_cmpxchg((uint32_t *)pte_pa, pte, updated_pte); + } else { + old_pte = qatomic_cmpxchg(pte_pa, pte, updated_pte); + } if (old_pte != pte) { goto restart; } @@ -1223,13 +1337,13 @@ hwaddr riscv_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) int mmu_idx = riscv_env_mmu_index(&cpu->env, false); if (get_physical_address(env, &phys_addr, &prot, addr, NULL, 0, mmu_idx, - true, env->virt_enabled, true)) { + true, env->virt_enabled, true, false)) { return -1; } if (env->virt_enabled) { if (get_physical_address(env, &phys_addr, &prot, phys_addr, NULL, - 0, MMUIdx_U, false, true, true)) { + 0, MMUIdx_U, false, true, true, false)) { return -1; } } @@ -1272,9 +1386,17 @@ void riscv_cpu_do_unaligned_access(CPUState *cs, vaddr addr, break; case MMU_DATA_LOAD: cs->exception_index = RISCV_EXCP_LOAD_ADDR_MIS; + /* shadow stack mis aligned accesses are access faults */ + if (mmu_idx & MMU_IDX_SS_WRITE) { + cs->exception_index = RISCV_EXCP_LOAD_ACCESS_FAULT; + } break; case MMU_DATA_STORE: cs->exception_index = RISCV_EXCP_STORE_AMO_ADDR_MIS; + /* shadow stack mis aligned accesses are access faults */ + if (mmu_idx & MMU_IDX_SS_WRITE) { + cs->exception_index = RISCV_EXCP_STORE_AMO_ACCESS_FAULT; + } break; default: g_assert_not_reached(); @@ -1335,7 +1457,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, /* Two stage lookup */ ret = get_physical_address(env, &pa, &prot, address, &env->guest_phys_fault_addr, access_type, - mmu_idx, true, true, false); + mmu_idx, true, true, false, probe); /* * A G-stage exception may be triggered during two state lookup. @@ -1358,7 +1480,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, ret = get_physical_address(env, &pa, &prot2, im_address, NULL, access_type, MMUIdx_U, false, true, - false); + false, probe); qemu_log_mask(CPU_LOG_MMU, "%s 2nd-stage address=%" VADDR_PRIx @@ -1395,7 +1517,8 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int size, } else { /* Single stage lookup */ ret = get_physical_address(env, &pa, &prot, address, NULL, - access_type, mmu_idx, true, false, false); + access_type, mmu_idx, true, false, false, + probe); qemu_log_mask(CPU_LOG_MMU, "%s address=%" VADDR_PRIx " ret %d physical " @@ -1641,6 +1764,22 @@ static target_ulong riscv_transformed_insn(CPURISCVState *env, return xinsn; } +static target_ulong promote_load_fault(target_ulong orig_cause) +{ + switch (orig_cause) { + case RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT: + return RISCV_EXCP_STORE_GUEST_AMO_ACCESS_FAULT; + + case RISCV_EXCP_LOAD_ACCESS_FAULT: + return RISCV_EXCP_STORE_AMO_ACCESS_FAULT; + + case RISCV_EXCP_LOAD_PAGE_FAULT: + return RISCV_EXCP_STORE_PAGE_FAULT; + } + + /* if no promotion, return original cause */ + return orig_cause; +} /* * Handle Traps * @@ -1653,6 +1792,7 @@ void riscv_cpu_do_interrupt(CPUState *cs) CPURISCVState *env = &cpu->env; bool virt = env->virt_enabled; bool write_gva = false; + bool always_storeamo = (env->excp_uw2 & RISCV_UW2_ALWAYS_STORE_AMO); uint64_t s; /* @@ -1670,6 +1810,8 @@ void riscv_cpu_do_interrupt(CPUState *cs) target_ulong tinst = 0; target_ulong htval = 0; target_ulong mtval2 = 0; + int sxlen = 0; + int mxlen = 0; if (!async) { /* set tval to badaddr for traps with address information */ @@ -1688,6 +1830,9 @@ void riscv_cpu_do_interrupt(CPUState *cs) case RISCV_EXCP_STORE_AMO_ACCESS_FAULT: case RISCV_EXCP_LOAD_PAGE_FAULT: case RISCV_EXCP_STORE_PAGE_FAULT: + if (always_storeamo) { + cause = promote_load_fault(cause); + } write_gva = env->two_stage_lookup; tval = env->badaddr; if (env->two_stage_indirect_lookup) { @@ -1729,6 +1874,9 @@ void riscv_cpu_do_interrupt(CPUState *cs) cs->watchpoint_hit = NULL; } break; + case RISCV_EXCP_SW_CHECK: + tval = env->sw_check_code; + break; default: break; } @@ -1760,6 +1908,11 @@ void riscv_cpu_do_interrupt(CPUState *cs) if (env->priv <= PRV_S && cause < 64 && (((deleg >> cause) & 1) || s_injected || vs_injected)) { /* handle the trap in S-mode */ + /* save elp status */ + if (cpu_get_fcfien(env)) { + env->mstatus = set_field(env->mstatus, MSTATUS_SPELP, env->elp); + } + if (riscv_has_ext(env, RVH)) { uint64_t hdeleg = async ? env->hideleg : env->hedeleg; @@ -1798,7 +1951,8 @@ void riscv_cpu_do_interrupt(CPUState *cs) s = set_field(s, MSTATUS_SPP, env->priv); s = set_field(s, MSTATUS_SIE, 0); env->mstatus = s; - env->scause = cause | ((target_ulong)async << (TARGET_LONG_BITS - 1)); + sxlen = 16 << riscv_cpu_sxl(env); + env->scause = cause | ((target_ulong)async << (sxlen - 1)); env->sepc = env->pc; env->stval = tval; env->htval = htval; @@ -1808,6 +1962,11 @@ void riscv_cpu_do_interrupt(CPUState *cs) riscv_cpu_set_mode(env, PRV_S, virt); } else { /* handle the trap in M-mode */ + /* save elp status */ + if (cpu_get_fcfien(env)) { + env->mstatus = set_field(env->mstatus, MSTATUS_MPELP, env->elp); + } + if (riscv_has_ext(env, RVH)) { if (env->virt_enabled) { riscv_cpu_swap_hypervisor_regs(env); @@ -1829,7 +1988,8 @@ void riscv_cpu_do_interrupt(CPUState *cs) s = set_field(s, MSTATUS_MPP, env->priv); s = set_field(s, MSTATUS_MIE, 0); env->mstatus = s; - env->mcause = cause | ~(((target_ulong)-1) >> async); + mxlen = 16 << riscv_cpu_mxl(env); + env->mcause = cause | ((target_ulong)async << (mxlen - 1)); env->mepc = env->pc; env->mtval = tval; env->mtval2 = mtval2; @@ -1839,6 +1999,13 @@ void riscv_cpu_do_interrupt(CPUState *cs) riscv_cpu_set_mode(env, PRV_M, virt); } + /* + * Interrupt/exception/trap delivery is asynchronous event and as per + * zicfilp spec CPU should clear up the ELP state. No harm in clearing + * unconditionally. + */ + env->elp = false; + /* * NOTE: it is not necessary to yield load reservations here. It is only * necessary for an SC from "another hart" to cause a load reservation diff --git a/target/riscv/cpu_user.h b/target/riscv/cpu_user.h index 02afad608b..e6927ff847 100644 --- a/target/riscv/cpu_user.h +++ b/target/riscv/cpu_user.h @@ -15,5 +15,6 @@ #define xA6 16 #define xA7 17 /* syscall number for RVI ABI */ #define xT0 5 /* syscall number for RVE ABI */ +#define xT2 7 #endif diff --git a/target/riscv/csr.c b/target/riscv/csr.c index ea3560342c..9846770820 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -184,6 +184,25 @@ static RISCVException zcmt(CPURISCVState *env, int csrno) return RISCV_EXCP_NONE; } +static RISCVException cfi_ss(CPURISCVState *env, int csrno) +{ + if (!env_archcpu(env)->cfg.ext_zicfiss) { + return RISCV_EXCP_ILLEGAL_INST; + } + + /* if bcfi not active for current env, access to csr is illegal */ + if (!cpu_get_bcfien(env)) { +#if !defined(CONFIG_USER_ONLY) + if (env->debugger) { + return RISCV_EXCP_NONE; + } +#endif + return RISCV_EXCP_ILLEGAL_INST; + } + + return RISCV_EXCP_NONE; +} + #if !defined(CONFIG_USER_ONLY) static RISCVException mctr(CPURISCVState *env, int csrno) { @@ -622,6 +641,19 @@ static RISCVException seed(CPURISCVState *env, int csrno) #endif } +/* zicfiss CSR_SSP read and write */ +static int read_ssp(CPURISCVState *env, int csrno, target_ulong *val) +{ + *val = env->ssp; + return RISCV_EXCP_NONE; +} + +static int write_ssp(CPURISCVState *env, int csrno, target_ulong val) +{ + env->ssp = val; + return RISCV_EXCP_NONE; +} + /* User Floating-Point CSRs */ static RISCVException read_fflags(CPURISCVState *env, int csrno, target_ulong *val) @@ -734,7 +766,7 @@ static RISCVException write_vxrm(CPURISCVState *env, int csrno, static RISCVException read_vxsat(CPURISCVState *env, int csrno, target_ulong *val) { - *val = env->vxsat; + *val = env->vxsat & BIT(0); return RISCV_EXCP_NONE; } @@ -744,7 +776,7 @@ static RISCVException write_vxsat(CPURISCVState *env, int csrno, #if !defined(CONFIG_USER_ONLY) env->mstatus |= MSTATUS_VS; #endif - env->vxsat = val; + env->vxsat = val & BIT(0); return RISCV_EXCP_NONE; } @@ -1377,6 +1409,7 @@ static const uint64_t all_ints = M_MODE_INTERRUPTS | S_MODE_INTERRUPTS | (1ULL << (RISCV_EXCP_INST_PAGE_FAULT)) | \ (1ULL << (RISCV_EXCP_LOAD_PAGE_FAULT)) | \ (1ULL << (RISCV_EXCP_STORE_PAGE_FAULT)) | \ + (1ULL << (RISCV_EXCP_SW_CHECK)) | \ (1ULL << (RISCV_EXCP_INST_GUEST_PAGE_FAULT)) | \ (1ULL << (RISCV_EXCP_LOAD_GUEST_ACCESS_FAULT)) | \ (1ULL << (RISCV_EXCP_VIRT_INSTRUCTION_FAULT)) | \ @@ -1598,6 +1631,11 @@ static RISCVException write_mstatus(CPURISCVState *env, int csrno, } } + /* If cfi lp extension is available, then apply cfi lp mask */ + if (env_archcpu(env)->cfg.ext_zicfilp) { + mask |= (MSTATUS_MPELP | MSTATUS_SPELP); + } + mstatus = (mstatus & ~mask) | (val & mask); env->mstatus = mstatus; @@ -2344,6 +2382,14 @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | (cfg->ext_sstc ? MENVCFG_STCE : 0) | (cfg->ext_svadu ? MENVCFG_ADUE : 0); + + if (env_archcpu(env)->cfg.ext_zicfilp) { + mask |= MENVCFG_LPE; + } + + if (env_archcpu(env)->cfg.ext_zicfiss) { + mask |= MENVCFG_SSE; + } } env->menvcfg = (env->menvcfg & ~mask) | (val & mask); @@ -2396,6 +2442,17 @@ static RISCVException write_senvcfg(CPURISCVState *env, int csrno, return ret; } + if (env_archcpu(env)->cfg.ext_zicfilp) { + mask |= SENVCFG_LPE; + } + + /* Higher mode SSE must be ON for next-less mode SSE to be ON */ + if (env_archcpu(env)->cfg.ext_zicfiss && + get_field(env->menvcfg, MENVCFG_SSE) && + (env->virt_enabled ? get_field(env->henvcfg, HENVCFG_SSE) : true)) { + mask |= SENVCFG_SSE; + } + env->senvcfg = (env->senvcfg & ~mask) | (val & mask); return RISCV_EXCP_NONE; } @@ -2433,6 +2490,16 @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, if (riscv_cpu_mxl(env) == MXL_RV64) { mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE); + + if (env_archcpu(env)->cfg.ext_zicfilp) { + mask |= HENVCFG_LPE; + } + + /* H can light up SSE for VS only if HS had it from menvcfg */ + if (env_archcpu(env)->cfg.ext_zicfiss && + get_field(env->menvcfg, MENVCFG_SSE)) { + mask |= HENVCFG_SSE; + } } env->henvcfg = (env->henvcfg & ~mask) | (val & mask); @@ -2897,6 +2964,10 @@ static RISCVException read_sstatus_i128(CPURISCVState *env, int csrno, mask |= SSTATUS64_UXL; } + if (env_archcpu(env)->cfg.ext_zicfilp) { + mask |= SSTATUS_SPELP; + } + *val = int128_make128(sstatus, add_status_sd(MXL_RV128, sstatus)); return RISCV_EXCP_NONE; } @@ -2908,6 +2979,11 @@ static RISCVException read_sstatus(CPURISCVState *env, int csrno, if (env->xl != MXL_RV32 || env->debugger) { mask |= SSTATUS64_UXL; } + + if (env_archcpu(env)->cfg.ext_zicfilp) { + mask |= SSTATUS_SPELP; + } + /* TODO: Use SXL not MXL. */ *val = add_status_sd(riscv_cpu_mxl(env), env->mstatus & mask); return RISCV_EXCP_NONE; @@ -2923,6 +2999,11 @@ static RISCVException write_sstatus(CPURISCVState *env, int csrno, mask |= SSTATUS64_UXL; } } + + if (env_archcpu(env)->cfg.ext_zicfilp) { + mask |= SSTATUS_SPELP; + } + target_ulong newval = (env->mstatus & ~mask) | (val & mask); return write_mstatus(env, CSR_MSTATUS, newval); } @@ -4934,6 +5015,9 @@ riscv_csr_operations csr_ops[CSR_TABLE_SIZE] = { /* Zcmt Extension */ [CSR_JVT] = {"jvt", zcmt, read_jvt, write_jvt}, + /* zicfiss Extension, shadow stack register */ + [CSR_SSP] = { "ssp", cfi_ss, read_ssp, write_ssp }, + #if !defined(CONFIG_USER_ONLY) /* Machine Timers and Counters */ [CSR_MCYCLE] = { "mcycle", any, read_hpmcounter, diff --git a/target/riscv/insn16.decode b/target/riscv/insn16.decode index 3953bcf82d..bf893d1c2e 100644 --- a/target/riscv/insn16.decode +++ b/target/riscv/insn16.decode @@ -140,6 +140,10 @@ sw 110 ... ... .. ... 00 @cs_w addi 000 . ..... ..... 01 @ci addi 010 . ..... ..... 01 @c_li { + # c.sspush x1 carving out of zcmops + sspush 011 0 00001 00000 01 &r2_s rs2=1 rs1=0 + # c.sspopchk x5 carving out of zcmops + sspopchk 011 0 00101 00000 01 &r2 rs1=5 rd=0 c_mop_n 011 0 0 n:3 1 00000 01 illegal 011 0 ----- 00000 01 # c.addi16sp and c.lui, RES nzimm=0 addi 011 . 00010 ..... 01 @c_addi16sp diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index c45b8fa1d8..e9139ec1b9 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -123,7 +123,10 @@ sfence_vm 0001000 00100 ..... 000 00000 1110011 @sfence_vm # *** RV32I Base Instruction Set *** lui .................... ..... 0110111 @u -auipc .................... ..... 0010111 @u +{ + lpad label:20 00000 0010111 + auipc .................... ..... 0010111 @u +} jal .................... ..... 1101111 @j jalr ............ ..... 000 ..... 1100111 @i beq ....... ..... ..... 000 ..... 1100011 @b @@ -243,6 +246,7 @@ remud 0000001 ..... ..... 111 ..... 1111011 @r lr_w 00010 . . 00000 ..... 010 ..... 0101111 @atom_ld sc_w 00011 . . ..... ..... 010 ..... 0101111 @atom_st amoswap_w 00001 . . ..... ..... 010 ..... 0101111 @atom_st +ssamoswap_w 01001 . . ..... ..... 010 ..... 0101111 @atom_st amoadd_w 00000 . . ..... ..... 010 ..... 0101111 @atom_st amoxor_w 00100 . . ..... ..... 010 ..... 0101111 @atom_st amoand_w 01100 . . ..... ..... 010 ..... 0101111 @atom_st @@ -256,6 +260,7 @@ amomaxu_w 11100 . . ..... ..... 010 ..... 0101111 @atom_st lr_d 00010 . . 00000 ..... 011 ..... 0101111 @atom_ld sc_d 00011 . . ..... ..... 011 ..... 0101111 @atom_st amoswap_d 00001 . . ..... ..... 011 ..... 0101111 @atom_st +ssamoswap_d 01001 . . ..... ..... 011 ..... 0101111 @atom_st amoadd_d 00000 . . ..... ..... 011 ..... 0101111 @atom_st amoxor_d 00100 . . ..... ..... 011 ..... 0101111 @atom_st amoand_d 01100 . . ..... ..... 011 ..... 0101111 @atom_st @@ -1019,8 +1024,23 @@ amocas_d 00101 . . ..... ..... 011 ..... 0101111 @atom_st amocas_q 00101 . . ..... ..... 100 ..... 0101111 @atom_st # *** Zimop may-be-operation extension *** -mop_r_n 1 . 00 .. 0111 .. ..... 100 ..... 1110011 @mop5 -mop_rr_n 1 . 00 .. 1 ..... ..... 100 ..... 1110011 @mop3 +{ + # zicfiss instructions carved out of mop.r + [ + ssrdp 1100110 11100 00000 100 rd:5 1110011 + sspopchk 1100110 11100 00001 100 00000 1110011 &r2 rs1=1 rd=0 + sspopchk 1100110 11100 00101 100 00000 1110011 &r2 rs1=5 rd=0 + ] + mop_r_n 1 . 00 .. 0111 .. ..... 100 ..... 1110011 @mop5 +} +{ + # zicfiss instruction carved out of mop.rr + [ + sspush 1100111 00001 00000 100 00000 1110011 &r2_s rs2=1 rs1=0 + sspush 1100111 00101 00000 100 00000 1110011 &r2_s rs2=5 rs1=0 + ] + mop_rr_n 1 . 00 .. 1 ..... ..... 100 ..... 1110011 @mop3 +} # *** Zabhb Standard Extension *** amoswap_b 00001 . . ..... ..... 000 ..... 0101111 @atom_st diff --git a/target/riscv/insn_trans/trans_privileged.c.inc b/target/riscv/insn_trans/trans_privileged.c.inc index bc5263a4e0..ecd3b8b2c9 100644 --- a/target/riscv/insn_trans/trans_privileged.c.inc +++ b/target/riscv/insn_trans/trans_privileged.c.inc @@ -78,7 +78,7 @@ static bool trans_sret(DisasContext *ctx, arg_sret *a) { #ifndef CONFIG_USER_ONLY if (has_ext(ctx, RVS)) { - decode_save_opc(ctx); + decode_save_opc(ctx, 0); translator_io_start(&ctx->base); gen_helper_sret(cpu_pc, tcg_env); exit_tb(ctx); /* no chaining */ @@ -95,7 +95,7 @@ static bool trans_sret(DisasContext *ctx, arg_sret *a) static bool trans_mret(DisasContext *ctx, arg_mret *a) { #ifndef CONFIG_USER_ONLY - decode_save_opc(ctx); + decode_save_opc(ctx, 0); translator_io_start(&ctx->base); gen_helper_mret(cpu_pc, tcg_env); exit_tb(ctx); /* no chaining */ @@ -109,7 +109,7 @@ static bool trans_mret(DisasContext *ctx, arg_mret *a) static bool trans_wfi(DisasContext *ctx, arg_wfi *a) { #ifndef CONFIG_USER_ONLY - decode_save_opc(ctx); + decode_save_opc(ctx, 0); gen_update_pc(ctx, ctx->cur_insn_len); gen_helper_wfi(tcg_env); return true; @@ -121,7 +121,7 @@ static bool trans_wfi(DisasContext *ctx, arg_wfi *a) static bool trans_sfence_vma(DisasContext *ctx, arg_sfence_vma *a) { #ifndef CONFIG_USER_ONLY - decode_save_opc(ctx); + decode_save_opc(ctx, 0); gen_helper_tlb_flush(tcg_env); return true; #endif diff --git a/target/riscv/insn_trans/trans_rva.c.inc b/target/riscv/insn_trans/trans_rva.c.inc index 39bbf60f3c..9cf3ae8019 100644 --- a/target/riscv/insn_trans/trans_rva.c.inc +++ b/target/riscv/insn_trans/trans_rva.c.inc @@ -34,7 +34,7 @@ static bool gen_lr(DisasContext *ctx, arg_atomic *a, MemOp mop) { TCGv src1; - decode_save_opc(ctx); + decode_save_opc(ctx, 0); src1 = get_address(ctx, a->rs1, 0); if (a->rl) { tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL); @@ -61,7 +61,7 @@ static bool gen_sc(DisasContext *ctx, arg_atomic *a, MemOp mop) TCGLabel *l1 = gen_new_label(); TCGLabel *l2 = gen_new_label(); - decode_save_opc(ctx); + decode_save_opc(ctx, 0); src1 = get_address(ctx, a->rs1, 0); tcg_gen_brcond_tl(TCG_COND_NE, load_res, src1, l1); diff --git a/target/riscv/insn_trans/trans_rvd.c.inc b/target/riscv/insn_trans/trans_rvd.c.inc index 8a46124f98..30883ea37c 100644 --- a/target/riscv/insn_trans/trans_rvd.c.inc +++ b/target/riscv/insn_trans/trans_rvd.c.inc @@ -61,7 +61,7 @@ static bool trans_fld(DisasContext *ctx, arg_fld *a) memop |= MO_ATOM_IFALIGN; } - decode_save_opc(ctx); + decode_save_opc(ctx, 0); addr = get_address(ctx, a->rs1, a->imm); tcg_gen_qemu_ld_i64(cpu_fpr[a->rd], addr, ctx->mem_idx, memop); @@ -85,7 +85,7 @@ static bool trans_fsd(DisasContext *ctx, arg_fsd *a) memop |= MO_ATOM_IFALIGN; } - decode_save_opc(ctx); + decode_save_opc(ctx, 0); addr = get_address(ctx, a->rs1, a->imm); tcg_gen_qemu_st_i64(cpu_fpr[a->rs2], addr, ctx->mem_idx, memop); return true; diff --git a/target/riscv/insn_trans/trans_rvf.c.inc b/target/riscv/insn_trans/trans_rvf.c.inc index 0222a728df..ed73afe089 100644 --- a/target/riscv/insn_trans/trans_rvf.c.inc +++ b/target/riscv/insn_trans/trans_rvf.c.inc @@ -52,7 +52,7 @@ static bool trans_flw(DisasContext *ctx, arg_flw *a) memop |= MO_ATOM_WITHIN16; } - decode_save_opc(ctx); + decode_save_opc(ctx, 0); addr = get_address(ctx, a->rs1, a->imm); dest = cpu_fpr[a->rd]; tcg_gen_qemu_ld_i64(dest, addr, ctx->mem_idx, memop); @@ -74,7 +74,7 @@ static bool trans_fsw(DisasContext *ctx, arg_fsw *a) memop |= MO_ATOM_WITHIN16; } - decode_save_opc(ctx); + decode_save_opc(ctx, 0); addr = get_address(ctx, a->rs1, a->imm); tcg_gen_qemu_st_i64(cpu_fpr[a->rs2], addr, ctx->mem_idx, memop); return true; diff --git a/target/riscv/insn_trans/trans_rvh.c.inc b/target/riscv/insn_trans/trans_rvh.c.inc index aa9d41c18c..03c6694430 100644 --- a/target/riscv/insn_trans/trans_rvh.c.inc +++ b/target/riscv/insn_trans/trans_rvh.c.inc @@ -44,7 +44,7 @@ static bool do_hlv(DisasContext *ctx, arg_r2 *a, TCGv dest = dest_gpr(ctx, a->rd); TCGv addr = get_gpr(ctx, a->rs1, EXT_NONE); - decode_save_opc(ctx); + decode_save_opc(ctx, 0); func(dest, tcg_env, addr); gen_set_gpr(ctx, a->rd, dest); return true; @@ -56,7 +56,7 @@ static bool do_hsv(DisasContext *ctx, arg_r2_s *a, TCGv addr = get_gpr(ctx, a->rs1, EXT_NONE); TCGv data = get_gpr(ctx, a->rs2, EXT_NONE); - decode_save_opc(ctx); + decode_save_opc(ctx, 0); func(tcg_env, addr, data); return true; } @@ -147,7 +147,7 @@ static bool trans_hfence_gvma(DisasContext *ctx, arg_sfence_vma *a) { REQUIRE_EXT(ctx, RVH); #ifndef CONFIG_USER_ONLY - decode_save_opc(ctx); + decode_save_opc(ctx, 0); gen_helper_hyp_gvma_tlb_flush(tcg_env); return true; #endif @@ -158,7 +158,7 @@ static bool trans_hfence_vvma(DisasContext *ctx, arg_sfence_vma *a) { REQUIRE_EXT(ctx, RVH); #ifndef CONFIG_USER_ONLY - decode_save_opc(ctx); + decode_save_opc(ctx, 0); gen_helper_hyp_tlb_flush(tcg_env); return true; #endif diff --git a/target/riscv/insn_trans/trans_rvi.c.inc b/target/riscv/insn_trans/trans_rvi.c.inc index fab5c06719..96c218a9d7 100644 --- a/target/riscv/insn_trans/trans_rvi.c.inc +++ b/target/riscv/insn_trans/trans_rvi.c.inc @@ -36,6 +36,49 @@ static bool trans_lui(DisasContext *ctx, arg_lui *a) return true; } +static bool trans_lpad(DisasContext *ctx, arg_lpad *a) +{ + /* + * fcfi_lp_expected can set only if fcfi was eanbled. + * translate further only if fcfi_lp_expected set. + * lpad comes from NOP space anyways, so return true if + * fcfi_lp_expected is false. + */ + if (!ctx->fcfi_lp_expected) { + return true; + } + + ctx->fcfi_lp_expected = false; + if ((ctx->base.pc_next) & 0x3) { + /* + * misaligned, according to spec we should raise sw check exception + */ + tcg_gen_st_tl(tcg_constant_tl(RISCV_EXCP_SW_CHECK_FCFI_TVAL), + tcg_env, offsetof(CPURISCVState, sw_check_code)); + gen_helper_raise_exception(tcg_env, + tcg_constant_i32(RISCV_EXCP_SW_CHECK)); + return true; + } + + /* per spec, label check performed only when embedded label non-zero */ + if (a->label != 0) { + TCGLabel *skip = gen_new_label(); + TCGv tmp = tcg_temp_new(); + tcg_gen_extract_tl(tmp, get_gpr(ctx, xT2, EXT_NONE), 12, 20); + tcg_gen_brcondi_tl(TCG_COND_EQ, tmp, a->label, skip); + tcg_gen_st_tl(tcg_constant_tl(RISCV_EXCP_SW_CHECK_FCFI_TVAL), + tcg_env, offsetof(CPURISCVState, sw_check_code)); + gen_helper_raise_exception(tcg_env, + tcg_constant_i32(RISCV_EXCP_SW_CHECK)); + gen_set_label(skip); + } + + tcg_gen_st8_tl(tcg_constant_tl(0), tcg_env, + offsetof(CPURISCVState, elp)); + + return true; +} + static bool trans_auipc(DisasContext *ctx, arg_auipc *a) { TCGv target_pc = dest_gpr(ctx, a->rd); @@ -75,6 +118,18 @@ static bool trans_jalr(DisasContext *ctx, arg_jalr *a) gen_set_gpr(ctx, a->rd, succ_pc); tcg_gen_mov_tl(cpu_pc, target_pc); + if (ctx->fcfi_enabled) { + /* + * return from functions (i.e. rs1 == xRA || rs1 == xT0) are not + * tracked. zicfilp introduces sw guarded branch as well. sw guarded + * branch are not tracked. rs1 == xT2 is a sw guarded branch. + */ + if (a->rs1 != xRA && a->rs1 != xT0 && a->rs1 != xT2) { + tcg_gen_st8_tl(tcg_constant_tl(1), + tcg_env, offsetof(CPURISCVState, elp)); + } + } + lookup_and_goto_ptr(ctx); if (misaligned) { @@ -271,7 +326,7 @@ static bool gen_load(DisasContext *ctx, arg_lb *a, MemOp memop) if (ctx->cfg_ptr->ext_zama16b) { memop |= MO_ATOM_WITHIN16; } - decode_save_opc(ctx); + decode_save_opc(ctx, 0); if (get_xl(ctx) == MXL_RV128) { out = gen_load_i128(ctx, a, memop); } else { @@ -372,7 +427,7 @@ static bool gen_store(DisasContext *ctx, arg_sb *a, MemOp memop) if (ctx->cfg_ptr->ext_zama16b) { memop |= MO_ATOM_WITHIN16; } - decode_save_opc(ctx); + decode_save_opc(ctx, 0); if (get_xl(ctx) == MXL_RV128) { return gen_store_i128(ctx, a, memop); } else { @@ -834,7 +889,7 @@ static bool trans_fence_i(DisasContext *ctx, arg_fence_i *a) static bool do_csr_post(DisasContext *ctx) { /* The helper may raise ILLEGAL_INSN -- record binv for unwind. */ - decode_save_opc(ctx); + decode_save_opc(ctx, 0); /* We may have changed important cpu state -- exit to main loop. */ gen_update_pc(ctx, ctx->cur_insn_len); exit_tb(ctx); diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc index ae1f40174a..27bf3f0b68 100644 --- a/target/riscv/insn_trans/trans_rvvk.c.inc +++ b/target/riscv/insn_trans/trans_rvvk.c.inc @@ -249,7 +249,7 @@ GEN_OPIVI_WIDEN_TRANS(vwsll_vi, IMM_ZX, vwsll_vx, vwsll_vx_check) \ if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ /* save opcode for unwinding in case we throw an exception */ \ - decode_save_opc(s); \ + decode_save_opc(s, 0); \ egs = tcg_constant_i32(EGS); \ gen_helper_egs_check(egs, tcg_env); \ } \ @@ -322,7 +322,7 @@ GEN_V_UNMASKED_TRANS(vaesem_vs, vaes_check_vs, ZVKNED_EGS) \ if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ /* save opcode for unwinding in case we throw an exception */ \ - decode_save_opc(s); \ + decode_save_opc(s, 0); \ egs = tcg_constant_i32(EGS); \ gen_helper_egs_check(egs, tcg_env); \ } \ @@ -389,7 +389,7 @@ GEN_VI_UNMASKED_TRANS(vaeskf2_vi, vaeskf2_check, ZVKNED_EGS) \ if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ /* save opcode for unwinding in case we throw an exception */ \ - decode_save_opc(s); \ + decode_save_opc(s, 0); \ egs = tcg_constant_i32(EGS); \ gen_helper_egs_check(egs, tcg_env); \ } \ @@ -440,7 +440,7 @@ static bool trans_vsha2cl_vv(DisasContext *s, arg_rmrr *a) if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { /* save opcode for unwinding in case we throw an exception */ - decode_save_opc(s); + decode_save_opc(s, 0); egs = tcg_constant_i32(ZVKNH_EGS); gen_helper_egs_check(egs, tcg_env); } @@ -471,7 +471,7 @@ static bool trans_vsha2ch_vv(DisasContext *s, arg_rmrr *a) if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { /* save opcode for unwinding in case we throw an exception */ - decode_save_opc(s); + decode_save_opc(s, 0); egs = tcg_constant_i32(ZVKNH_EGS); gen_helper_egs_check(egs, tcg_env); } diff --git a/target/riscv/insn_trans/trans_rvzacas.c.inc b/target/riscv/insn_trans/trans_rvzacas.c.inc index fcced99fc7..15e688a033 100644 --- a/target/riscv/insn_trans/trans_rvzacas.c.inc +++ b/target/riscv/insn_trans/trans_rvzacas.c.inc @@ -76,7 +76,7 @@ static bool gen_cmpxchg64(DisasContext *ctx, arg_atomic *a, MemOp mop) TCGv src1 = get_address(ctx, a->rs1, 0); TCGv_i64 src2 = get_gpr_pair(ctx, a->rs2); - decode_save_opc(ctx); + decode_save_opc(ctx, RISCV_UW2_ALWAYS_STORE_AMO); tcg_gen_atomic_cmpxchg_i64(dest, src1, dest, src2, ctx->mem_idx, mop); gen_set_gpr_pair(ctx, a->rd, dest); @@ -121,7 +121,7 @@ static bool trans_amocas_q(DisasContext *ctx, arg_amocas_q *a) tcg_gen_concat_i64_i128(src2, src2l, src2h); tcg_gen_concat_i64_i128(dest, destl, desth); - decode_save_opc(ctx); + decode_save_opc(ctx, RISCV_UW2_ALWAYS_STORE_AMO); tcg_gen_atomic_cmpxchg_i128(dest, src1, dest, src2, ctx->mem_idx, (MO_ALIGN | MO_TEUO)); diff --git a/target/riscv/insn_trans/trans_rvzfh.c.inc b/target/riscv/insn_trans/trans_rvzfh.c.inc index 1eb458b491..bece48e600 100644 --- a/target/riscv/insn_trans/trans_rvzfh.c.inc +++ b/target/riscv/insn_trans/trans_rvzfh.c.inc @@ -48,7 +48,7 @@ static bool trans_flh(DisasContext *ctx, arg_flh *a) REQUIRE_FPU; REQUIRE_ZFHMIN_OR_ZFBFMIN(ctx); - decode_save_opc(ctx); + decode_save_opc(ctx, 0); t0 = get_gpr(ctx, a->rs1, EXT_NONE); if (a->imm) { TCGv temp = tcg_temp_new(); @@ -71,7 +71,7 @@ static bool trans_fsh(DisasContext *ctx, arg_fsh *a) REQUIRE_FPU; REQUIRE_ZFHMIN_OR_ZFBFMIN(ctx); - decode_save_opc(ctx); + decode_save_opc(ctx, 0); t0 = get_gpr(ctx, a->rs1, EXT_NONE); if (a->imm) { TCGv temp = tcg_temp_new(); diff --git a/target/riscv/insn_trans/trans_rvzicfiss.c.inc b/target/riscv/insn_trans/trans_rvzicfiss.c.inc new file mode 100644 index 0000000000..e3ebc4977c --- /dev/null +++ b/target/riscv/insn_trans/trans_rvzicfiss.c.inc @@ -0,0 +1,114 @@ +/* + * RISC-V translation routines for the Control-Flow Integrity Extension + * + * Copyright (c) 2024 Rivos Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +static bool trans_sspopchk(DisasContext *ctx, arg_sspopchk *a) +{ + if (!ctx->bcfi_enabled) { + return false; + } + + TCGv addr = tcg_temp_new(); + TCGLabel *skip = gen_new_label(); + uint32_t tmp = (get_xl(ctx) == MXL_RV64) ? 8 : 4; + TCGv data = tcg_temp_new(); + tcg_gen_ld_tl(addr, tcg_env, offsetof(CPURISCVState, ssp)); + decode_save_opc(ctx, RISCV_UW2_ALWAYS_STORE_AMO); + tcg_gen_qemu_ld_tl(data, addr, SS_MMU_INDEX(ctx), + mxl_memop(ctx) | MO_ALIGN); + TCGv rs1 = get_gpr(ctx, a->rs1, EXT_NONE); + tcg_gen_brcond_tl(TCG_COND_EQ, data, rs1, skip); + tcg_gen_st_tl(tcg_constant_tl(RISCV_EXCP_SW_CHECK_BCFI_TVAL), + tcg_env, offsetof(CPURISCVState, sw_check_code)); + gen_helper_raise_exception(tcg_env, + tcg_constant_i32(RISCV_EXCP_SW_CHECK)); + gen_set_label(skip); + tcg_gen_addi_tl(addr, addr, tmp); + tcg_gen_st_tl(addr, tcg_env, offsetof(CPURISCVState, ssp)); + + return true; +} + +static bool trans_sspush(DisasContext *ctx, arg_sspush *a) +{ + if (!ctx->bcfi_enabled) { + return false; + } + + TCGv addr = tcg_temp_new(); + int tmp = (get_xl(ctx) == MXL_RV64) ? -8 : -4; + TCGv data = get_gpr(ctx, a->rs2, EXT_NONE); + decode_save_opc(ctx, RISCV_UW2_ALWAYS_STORE_AMO); + tcg_gen_ld_tl(addr, tcg_env, offsetof(CPURISCVState, ssp)); + tcg_gen_addi_tl(addr, addr, tmp); + tcg_gen_qemu_st_tl(data, addr, SS_MMU_INDEX(ctx), + mxl_memop(ctx) | MO_ALIGN); + tcg_gen_st_tl(addr, tcg_env, offsetof(CPURISCVState, ssp)); + + return true; +} + +static bool trans_ssrdp(DisasContext *ctx, arg_ssrdp *a) +{ + if (!ctx->bcfi_enabled || a->rd == 0) { + return false; + } + + TCGv dest = dest_gpr(ctx, a->rd); + tcg_gen_ld_tl(dest, tcg_env, offsetof(CPURISCVState, ssp)); + gen_set_gpr(ctx, a->rd, dest); + + return true; +} + +static bool trans_ssamoswap_w(DisasContext *ctx, arg_amoswap_w *a) +{ + REQUIRE_A_OR_ZAAMO(ctx); + if (!ctx->bcfi_enabled) { + return false; + } + + TCGv dest = dest_gpr(ctx, a->rd); + TCGv src1, src2 = get_gpr(ctx, a->rs2, EXT_NONE); + + decode_save_opc(ctx, RISCV_UW2_ALWAYS_STORE_AMO); + src1 = get_address(ctx, a->rs1, 0); + + tcg_gen_atomic_xchg_tl(dest, src1, src2, SS_MMU_INDEX(ctx), + (MO_ALIGN | MO_TESL)); + gen_set_gpr(ctx, a->rd, dest); + return true; +} + +static bool trans_ssamoswap_d(DisasContext *ctx, arg_amoswap_w *a) +{ + REQUIRE_64BIT(ctx); + REQUIRE_A_OR_ZAAMO(ctx); + if (!ctx->bcfi_enabled) { + return false; + } + + TCGv dest = dest_gpr(ctx, a->rd); + TCGv src1, src2 = get_gpr(ctx, a->rs2, EXT_NONE); + + decode_save_opc(ctx, RISCV_UW2_ALWAYS_STORE_AMO); + src1 = get_address(ctx, a->rs1, 0); + + tcg_gen_atomic_xchg_tl(dest, src1, src2, SS_MMU_INDEX(ctx), + (MO_ALIGN | MO_TESQ)); + gen_set_gpr(ctx, a->rd, dest); + return true; +} diff --git a/target/riscv/insn_trans/trans_svinval.c.inc b/target/riscv/insn_trans/trans_svinval.c.inc index 0f692a1088..a06c3b214f 100644 --- a/target/riscv/insn_trans/trans_svinval.c.inc +++ b/target/riscv/insn_trans/trans_svinval.c.inc @@ -28,7 +28,7 @@ static bool trans_sinval_vma(DisasContext *ctx, arg_sinval_vma *a) /* Do the same as sfence.vma currently */ REQUIRE_EXT(ctx, RVS); #ifndef CONFIG_USER_ONLY - decode_save_opc(ctx); + decode_save_opc(ctx, 0); gen_helper_tlb_flush(tcg_env); return true; #endif @@ -57,7 +57,7 @@ static bool trans_hinval_vvma(DisasContext *ctx, arg_hinval_vvma *a) /* Do the same as hfence.vvma currently */ REQUIRE_EXT(ctx, RVH); #ifndef CONFIG_USER_ONLY - decode_save_opc(ctx); + decode_save_opc(ctx, 0); gen_helper_hyp_tlb_flush(tcg_env); return true; #endif @@ -70,7 +70,7 @@ static bool trans_hinval_gvma(DisasContext *ctx, arg_hinval_gvma *a) /* Do the same as hfence.gvma currently */ REQUIRE_EXT(ctx, RVH); #ifndef CONFIG_USER_ONLY - decode_save_opc(ctx); + decode_save_opc(ctx, 0); gen_helper_hyp_gvma_tlb_flush(tcg_env); return true; #endif diff --git a/target/riscv/internals.h b/target/riscv/internals.h index 0ac17bc5ad..ddbdee885b 100644 --- a/target/riscv/internals.h +++ b/target/riscv/internals.h @@ -30,12 +30,15 @@ * - U+2STAGE 0b100 * - S+2STAGE 0b101 * - S+SUM+2STAGE 0b110 + * - Shadow stack+U 0b1000 + * - Shadow stack+S 0b1001 */ #define MMUIdx_U 0 #define MMUIdx_S 1 #define MMUIdx_S_SUM 2 #define MMUIdx_M 3 #define MMU_2STAGE_BIT (1 << 2) +#define MMU_IDX_SS_WRITE (1 << 3) static inline int mmuidx_priv(int mmu_idx) { diff --git a/target/riscv/kvm/kvm-cpu.c b/target/riscv/kvm/kvm-cpu.c index 8233a32102..cbda4596da 100644 --- a/target/riscv/kvm/kvm-cpu.c +++ b/target/riscv/kvm/kvm-cpu.c @@ -1676,9 +1676,9 @@ void kvm_arch_accel_class_init(ObjectClass *oc) object_class_property_add_str(oc, "riscv-aia", riscv_get_kvm_aia, riscv_set_kvm_aia); object_class_property_set_description(oc, "riscv-aia", - "Set KVM AIA mode. Valid values are " - "emul, hwaccel, and auto. Default " - "is auto."); + "Set KVM AIA mode. Valid values are 'emul', 'hwaccel' and 'auto'. " + "Changing KVM AIA modes relies on host support. Defaults to 'auto' " + "if the host supports it"); object_property_set_default_str(object_class_property_find(oc, "riscv-aia"), "auto"); } @@ -1711,18 +1711,20 @@ void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, error_report("KVM AIA: failed to get current KVM AIA mode"); exit(1); } - qemu_log("KVM AIA: default mode is %s\n", - kvm_aia_mode_str(default_aia_mode)); if (default_aia_mode != aia_mode) { ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, KVM_DEV_RISCV_AIA_CONFIG_MODE, &aia_mode, true, NULL); - if (ret < 0) - warn_report("KVM AIA: failed to set KVM AIA mode"); - else - qemu_log("KVM AIA: set current mode to %s\n", - kvm_aia_mode_str(aia_mode)); + if (ret < 0) { + warn_report("KVM AIA: failed to set KVM AIA mode '%s', using " + "default host mode '%s'", + kvm_aia_mode_str(aia_mode), + kvm_aia_mode_str(default_aia_mode)); + + /* failed to change AIA mode, use default */ + aia_mode = default_aia_mode; + } } ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, diff --git a/target/riscv/machine.c b/target/riscv/machine.c index 492c2c6d9d..99f0af5077 100644 --- a/target/riscv/machine.c +++ b/target/riscv/machine.c @@ -350,6 +350,42 @@ static const VMStateDescription vmstate_jvt = { } }; +static bool elp_needed(void *opaque) +{ + RISCVCPU *cpu = opaque; + + return cpu->cfg.ext_zicfilp; +} + +static const VMStateDescription vmstate_elp = { + .name = "cpu/elp", + .version_id = 1, + .minimum_version_id = 1, + .needed = elp_needed, + .fields = (const VMStateField[]) { + VMSTATE_BOOL(env.elp, RISCVCPU), + VMSTATE_END_OF_LIST() + } +}; + +static bool ssp_needed(void *opaque) +{ + RISCVCPU *cpu = opaque; + + return cpu->cfg.ext_zicfiss; +} + +static const VMStateDescription vmstate_ssp = { + .name = "cpu/ssp", + .version_id = 1, + .minimum_version_id = 1, + .needed = ssp_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINTTL(env.ssp, RISCVCPU), + VMSTATE_END_OF_LIST() + } +}; + const VMStateDescription vmstate_riscv_cpu = { .name = "cpu", .version_id = 10, @@ -422,6 +458,8 @@ const VMStateDescription vmstate_riscv_cpu = { &vmstate_debug, &vmstate_smstateen, &vmstate_jvt, + &vmstate_elp, + &vmstate_ssp, NULL } }; diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c index 25a5263573..eddedacf4b 100644 --- a/target/riscv/op_helper.c +++ b/target/riscv/op_helper.c @@ -309,6 +309,15 @@ target_ulong helper_sret(CPURISCVState *env) riscv_cpu_set_mode(env, prev_priv, prev_virt); + /* + * If forward cfi enabled for new priv, restore elp status + * and clear spelp in mstatus + */ + if (cpu_get_fcfien(env)) { + env->elp = get_field(env->mstatus, MSTATUS_SPELP); + } + env->mstatus = set_field(env->mstatus, MSTATUS_SPELP, 0); + return retpc; } @@ -349,6 +358,14 @@ target_ulong helper_mret(CPURISCVState *env) } riscv_cpu_set_mode(env, prev_priv, prev_virt); + /* + * If forward cfi enabled for new priv, restore elp status + * and clear mpelp in mstatus + */ + if (cpu_get_fcfien(env)) { + env->elp = get_field(env->mstatus, MSTATUS_MPELP); + } + env->mstatus = set_field(env->mstatus, MSTATUS_MPELP, 0); return retpc; } diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c index 9eea397e72..a1b36664fc 100644 --- a/target/riscv/pmp.c +++ b/target/riscv/pmp.c @@ -326,7 +326,7 @@ bool pmp_hart_has_privs(CPURISCVState *env, hwaddr addr, */ pmp_size = -(addr | TARGET_PAGE_MASK); } else { - pmp_size = sizeof(target_ulong); + pmp_size = 2 << riscv_cpu_mxl(env); } } else { pmp_size = size; @@ -598,6 +598,11 @@ void mseccfg_csr_write(CPURISCVState *env, target_ulong val) val &= ~(MSECCFG_MMWP | MSECCFG_MML | MSECCFG_RLB); } + /* M-mode forward cfi to be enabled if cfi extension is implemented */ + if (env_archcpu(env)->cfg.ext_zicfilp) { + val |= (val & MSECCFG_MLPE); + } + env->mseccfg = val; } diff --git a/target/riscv/pmp.h b/target/riscv/pmp.h index f5c10ce85c..e0530a17a3 100644 --- a/target/riscv/pmp.h +++ b/target/riscv/pmp.h @@ -44,7 +44,8 @@ typedef enum { MSECCFG_MMWP = 1 << 1, MSECCFG_RLB = 1 << 2, MSECCFG_USEED = 1 << 8, - MSECCFG_SSEED = 1 << 9 + MSECCFG_SSEED = 1 << 9, + MSECCFG_MLPE = 1 << 10, } mseccfg_field_t; typedef struct { diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c index dea8ab7a43..c62c221696 100644 --- a/target/riscv/tcg/tcg-cpu.c +++ b/target/riscv/tcg/tcg-cpu.c @@ -129,6 +129,7 @@ static void riscv_restore_state_to_opc(CPUState *cs, env->pc = pc; } env->bins = data[1]; + env->excp_uw2 = data[2]; } static const TCGCPUOps riscv_tcg_ops = { @@ -618,11 +619,39 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) cpu->cfg.ext_zihpm = false; } + if (cpu->cfg.ext_zicfiss) { + if (!cpu->cfg.ext_zicsr) { + error_setg(errp, "zicfiss extension requires zicsr extension"); + return; + } + if (!riscv_has_ext(env, RVA)) { + error_setg(errp, "zicfiss extension requires A extension"); + return; + } + if (!riscv_has_ext(env, RVS)) { + error_setg(errp, "zicfiss extension requires S"); + return; + } + if (!cpu->cfg.ext_zimop) { + error_setg(errp, "zicfiss extension requires zimop extension"); + return; + } + if (cpu->cfg.ext_zca && !cpu->cfg.ext_zcmop) { + error_setg(errp, "zicfiss with zca requires zcmop extension"); + return; + } + } + if (!cpu->cfg.ext_zihpm) { cpu->cfg.pmu_mask = 0; cpu->pmu_avail_ctrs = 0; } + if (cpu->cfg.ext_zicfilp && !cpu->cfg.ext_zicsr) { + error_setg(errp, "zicfilp extension requires zicsr extension"); + return; + } + /* * Disable isa extensions based on priv spec after we * validated and set everything we need. diff --git a/target/riscv/translate.c b/target/riscv/translate.c index acba90f170..bccaf8e89a 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -116,6 +116,11 @@ typedef struct DisasContext { bool frm_valid; bool insn_start_updated; const GPtrArray *decoders; + /* zicfilp extension. fcfi_enabled, lp expected or not */ + bool fcfi_enabled; + bool fcfi_lp_expected; + /* zicfiss extension, if shadow stack was enabled during TB gen */ + bool bcfi_enabled; } DisasContext; static inline bool has_ext(DisasContext *ctx, uint32_t ext) @@ -139,6 +144,8 @@ static inline bool has_ext(DisasContext *ctx, uint32_t ext) #define get_address_xl(ctx) ((ctx)->address_xl) #endif +#define mxl_memop(ctx) ((get_xl(ctx) + 1) | MO_TE) + /* The word size for this machine mode. */ static inline int __attribute__((unused)) get_xlen(DisasContext *ctx) { @@ -204,11 +211,12 @@ static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in) tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan); } -static void decode_save_opc(DisasContext *ctx) +static void decode_save_opc(DisasContext *ctx, target_ulong excp_uw2) { assert(!ctx->insn_start_updated); ctx->insn_start_updated = true; tcg_set_insn_start_param(ctx->base.insn_start, 1, ctx->opcode); + tcg_set_insn_start_param(ctx->base.insn_start, 2, excp_uw2); } static void gen_pc_plus_diff(TCGv target, DisasContext *ctx, @@ -694,7 +702,7 @@ static void gen_set_rm(DisasContext *ctx, int rm) } /* The helper may raise ILLEGAL_INSN -- record binv for unwind. */ - decode_save_opc(ctx); + decode_save_opc(ctx, 0); gen_helper_set_rounding_mode(tcg_env, tcg_constant_i32(rm)); } @@ -707,7 +715,7 @@ static void gen_set_rm_chkfrm(DisasContext *ctx, int rm) ctx->frm_valid = true; /* The helper may raise ILLEGAL_INSN -- record binv for unwind. */ - decode_save_opc(ctx); + decode_save_opc(ctx, 0); gen_helper_set_rounding_mode_chkfrm(tcg_env, tcg_constant_i32(rm)); } @@ -1091,7 +1099,7 @@ static bool gen_amo(DisasContext *ctx, arg_atomic *a, mop |= MO_ALIGN; } - decode_save_opc(ctx); + decode_save_opc(ctx, RISCV_UW2_ALWAYS_STORE_AMO); src1 = get_address(ctx, a->rs1, 0); func(dest, src1, src2, ctx->mem_idx, mop); @@ -1105,7 +1113,7 @@ static bool gen_cmpxchg(DisasContext *ctx, arg_atomic *a, MemOp mop) TCGv src1 = get_address(ctx, a->rs1, 0); TCGv src2 = get_gpr(ctx, a->rs2, EXT_NONE); - decode_save_opc(ctx); + decode_save_opc(ctx, RISCV_UW2_ALWAYS_STORE_AMO); tcg_gen_atomic_cmpxchg_tl(dest, src1, dest, src2, ctx->mem_idx, mop); gen_set_gpr(ctx, a->rd, dest); @@ -1121,6 +1129,8 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) return translator_ldl(env, &ctx->base, pc); } +#define SS_MMU_INDEX(ctx) (ctx->mem_idx | MMU_IDX_SS_WRITE) + /* Include insn module translation function */ #include "insn_trans/trans_rvi.c.inc" #include "insn_trans/trans_rvm.c.inc" @@ -1151,6 +1161,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) #include "decode-insn16.c.inc" #include "insn_trans/trans_rvzce.c.inc" #include "insn_trans/trans_rvzcmop.c.inc" +#include "insn_trans/trans_rvzicfiss.c.inc" /* Include decoders for factored-out extensions */ #include "decode-XVentanaCondOps.c.inc" @@ -1238,6 +1249,9 @@ static void riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs) ctx->pm_base_enabled = FIELD_EX32(tb_flags, TB_FLAGS, PM_BASE_ENABLED); ctx->ztso = cpu->cfg.ext_ztso; ctx->itrigger = FIELD_EX32(tb_flags, TB_FLAGS, ITRIGGER); + ctx->bcfi_enabled = FIELD_EX32(tb_flags, TB_FLAGS, BCFI_ENABLED); + ctx->fcfi_lp_expected = FIELD_EX32(tb_flags, TB_FLAGS, FCFI_LP_EXPECTED); + ctx->fcfi_enabled = FIELD_EX32(tb_flags, TB_FLAGS, FCFI_ENABLED); ctx->zero = tcg_constant_tl(0); ctx->virt_inst_excp = false; ctx->decoders = cpu->decoders; @@ -1256,7 +1270,7 @@ static void riscv_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) pc_next &= ~TARGET_PAGE_MASK; } - tcg_gen_insn_start(pc_next, 0); + tcg_gen_insn_start(pc_next, 0, 0); ctx->insn_start_updated = false; } @@ -1270,6 +1284,24 @@ static void riscv_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) decode_opc(env, ctx, opcode16); ctx->base.pc_next += ctx->cur_insn_len; + /* + * If 'fcfi_lp_expected' is still true after processing the instruction, + * then we did not see an 'lpad' instruction, and must raise an exception. + * Insert code to raise the exception at the start of the insn; any other + * code the insn may have emitted will be deleted as dead code following + * the noreturn exception + */ + if (ctx->fcfi_lp_expected) { + /* Emit after insn_start, i.e. before the op following insn_start. */ + tcg_ctx->emit_before_op = QTAILQ_NEXT(ctx->base.insn_start, link); + tcg_gen_st_tl(tcg_constant_tl(RISCV_EXCP_SW_CHECK_FCFI_TVAL), + tcg_env, offsetof(CPURISCVState, sw_check_code)); + gen_helper_raise_exception(tcg_env, + tcg_constant_i32(RISCV_EXCP_SW_CHECK)); + tcg_ctx->emit_before_op = NULL; + ctx->base.is_jmp = DISAS_NORETURN; + } + /* Only the first insn within a TB is allowed to cross a page boundary. */ if (ctx->base.is_jmp == DISAS_NEXT) { if (ctx->itrigger || !is_same_page(&ctx->base, ctx->base.pc_next)) { diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 072bd444b1..ccb32e6122 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -5132,7 +5132,7 @@ void HELPER(NAME)(void *vd, void *v0, void *vs1, void *vs2, \ } \ env->vstart = 0; \ /* set tail elements to 1s */ \ - vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); \ + vext_set_elems_1s(vd, vta, num * esz, total_elems * esz); \ } /* Compress into vd elements of vs2 where vs1 is enabled */ diff --git a/tests/avocado/tuxrun_baselines.py b/tests/avocado/tuxrun_baselines.py index 38064840da..366c262e32 100644 --- a/tests/avocado/tuxrun_baselines.py +++ b/tests/avocado/tuxrun_baselines.py @@ -222,3 +222,19 @@ class TuxRunBaselineTest(QemuSystemTest): "rootfs.ext4.zst" : "e6ffd8813c8a335bc15728f2835f90539c84be7f8f5f691a8b01451b47fb4bd7"} self.common_tuxrun(csums=sums) + + def test_riscv64_rv32(self): + """ + :avocado: tags=arch:riscv64 + :avocado: tags=machine:virt + :avocado: tags=tuxboot:riscv32 + :avocado: tags=cpu:rv32 + """ + sums = { "Image" : + "89599407d7334de629a40e7ad6503c73670359eb5f5ae9d686353a3d6deccbd5", + "fw_jump.elf" : + "f2ef28a0b77826f79d085d3e4aa686f1159b315eff9099a37046b18936676985", + "rootfs.ext4.zst" : + "7168d296d0283238ea73cd5a775b3dd608e55e04c7b92b76ecce31bb13108cba" } + + self.common_tuxrun(csums=sums) diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build index 270439c2df..46f130ccfd 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build @@ -67,6 +67,10 @@ if have_virtfs libqos_srcs += files('virtio-9p.c', 'virtio-9p-client.c') endif +if config_all_devices.has_key('CONFIG_RISCV_IOMMU') + libqos_srcs += files('riscv-iommu.c') +endif + libqos = static_library('qos', libqos_srcs + genh, build_by_default: false) diff --git a/tests/qtest/libqos/riscv-iommu.c b/tests/qtest/libqos/riscv-iommu.c new file mode 100644 index 0000000000..01e3b31c0b --- /dev/null +++ b/tests/qtest/libqos/riscv-iommu.c @@ -0,0 +1,76 @@ +/* + * libqos driver riscv-iommu-pci framework + * + * Copyright (c) 2024 Ventana Micro Systems Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at your + * option) any later version. See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "../libqtest.h" +#include "qemu/module.h" +#include "qgraph.h" +#include "pci.h" +#include "riscv-iommu.h" + +static void *riscv_iommu_pci_get_driver(void *obj, const char *interface) +{ + QRISCVIOMMU *r_iommu_pci = obj; + + if (!g_strcmp0(interface, "pci-device")) { + return &r_iommu_pci->dev; + } + + fprintf(stderr, "%s not present in riscv_iommu_pci\n", interface); + g_assert_not_reached(); +} + +static void riscv_iommu_pci_start_hw(QOSGraphObject *obj) +{ + QRISCVIOMMU *pci = (QRISCVIOMMU *)obj; + qpci_device_enable(&pci->dev); +} + +static void riscv_iommu_pci_destructor(QOSGraphObject *obj) +{ + QRISCVIOMMU *pci = (QRISCVIOMMU *)obj; + qpci_iounmap(&pci->dev, pci->reg_bar); +} + +static void *riscv_iommu_pci_create(void *pci_bus, QGuestAllocator *alloc, + void *addr) +{ + QRISCVIOMMU *r_iommu_pci = g_new0(QRISCVIOMMU, 1); + QPCIBus *bus = pci_bus; + + qpci_device_init(&r_iommu_pci->dev, bus, addr); + r_iommu_pci->reg_bar = qpci_iomap(&r_iommu_pci->dev, 0, NULL); + + r_iommu_pci->obj.get_driver = riscv_iommu_pci_get_driver; + r_iommu_pci->obj.start_hw = riscv_iommu_pci_start_hw; + r_iommu_pci->obj.destructor = riscv_iommu_pci_destructor; + return &r_iommu_pci->obj; +} + +static void riscv_iommu_pci_register_nodes(void) +{ + QPCIAddress addr = { + .vendor_id = RISCV_IOMMU_PCI_VENDOR_ID, + .device_id = RISCV_IOMMU_PCI_DEVICE_ID, + .devfn = QPCI_DEVFN(1, 0), + }; + + QOSGraphEdgeOptions opts = { + .extra_device_opts = "addr=01.0", + }; + + add_qpci_address(&opts, &addr); + + qos_node_create_driver("riscv-iommu-pci", riscv_iommu_pci_create); + qos_node_produces("riscv-iommu-pci", "pci-device"); + qos_node_consumes("riscv-iommu-pci", "pci-bus", &opts); +} + +libqos_init(riscv_iommu_pci_register_nodes); diff --git a/tests/qtest/libqos/riscv-iommu.h b/tests/qtest/libqos/riscv-iommu.h new file mode 100644 index 0000000000..318db13799 --- /dev/null +++ b/tests/qtest/libqos/riscv-iommu.h @@ -0,0 +1,101 @@ +/* + * libqos driver riscv-iommu-pci framework + * + * Copyright (c) 2024 Ventana Micro Systems Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at your + * option) any later version. See the COPYING file in the top-level directory. + * + */ + +#ifndef TESTS_LIBQOS_RISCV_IOMMU_H +#define TESTS_LIBQOS_RISCV_IOMMU_H + +#include "qgraph.h" +#include "pci.h" +#include "qemu/bitops.h" + +#ifndef GENMASK_ULL +#define GENMASK_ULL(h, l) (((~0ULL) >> (63 - (h) + (l))) << (l)) +#endif + +/* + * RISC-V IOMMU uses PCI_VENDOR_ID_REDHAT 0x1b36 and + * PCI_DEVICE_ID_REDHAT_RISCV_IOMMU 0x0014. + */ +#define RISCV_IOMMU_PCI_VENDOR_ID 0x1b36 +#define RISCV_IOMMU_PCI_DEVICE_ID 0x0014 +#define RISCV_IOMMU_PCI_DEVICE_CLASS 0x0806 + +/* Common field positions */ +#define RISCV_IOMMU_QUEUE_ENABLE BIT(0) +#define RISCV_IOMMU_QUEUE_INTR_ENABLE BIT(1) +#define RISCV_IOMMU_QUEUE_MEM_FAULT BIT(8) +#define RISCV_IOMMU_QUEUE_ACTIVE BIT(16) +#define RISCV_IOMMU_QUEUE_BUSY BIT(17) + +#define RISCV_IOMMU_REG_CAP 0x0000 +#define RISCV_IOMMU_CAP_VERSION GENMASK_ULL(7, 0) + +#define RISCV_IOMMU_REG_DDTP 0x0010 +#define RISCV_IOMMU_DDTP_BUSY BIT_ULL(4) +#define RISCV_IOMMU_DDTP_MODE GENMASK_ULL(3, 0) +#define RISCV_IOMMU_DDTP_MODE_OFF 0 + +#define RISCV_IOMMU_REG_CQCSR 0x0048 +#define RISCV_IOMMU_CQCSR_CQEN RISCV_IOMMU_QUEUE_ENABLE +#define RISCV_IOMMU_CQCSR_CIE RISCV_IOMMU_QUEUE_INTR_ENABLE +#define RISCV_IOMMU_CQCSR_CQON RISCV_IOMMU_QUEUE_ACTIVE +#define RISCV_IOMMU_CQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY + +#define RISCV_IOMMU_REG_FQCSR 0x004C +#define RISCV_IOMMU_FQCSR_FQEN RISCV_IOMMU_QUEUE_ENABLE +#define RISCV_IOMMU_FQCSR_FIE RISCV_IOMMU_QUEUE_INTR_ENABLE +#define RISCV_IOMMU_FQCSR_FQON RISCV_IOMMU_QUEUE_ACTIVE +#define RISCV_IOMMU_FQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY + +#define RISCV_IOMMU_REG_PQCSR 0x0050 +#define RISCV_IOMMU_PQCSR_PQEN RISCV_IOMMU_QUEUE_ENABLE +#define RISCV_IOMMU_PQCSR_PIE RISCV_IOMMU_QUEUE_INTR_ENABLE +#define RISCV_IOMMU_PQCSR_PQON RISCV_IOMMU_QUEUE_ACTIVE +#define RISCV_IOMMU_PQCSR_BUSY RISCV_IOMMU_QUEUE_BUSY + +#define RISCV_IOMMU_REG_IPSR 0x0054 + +#define RISCV_IOMMU_REG_IVEC 0x02F8 +#define RISCV_IOMMU_REG_IVEC_CIV GENMASK_ULL(3, 0) +#define RISCV_IOMMU_REG_IVEC_FIV GENMASK_ULL(7, 4) +#define RISCV_IOMMU_REG_IVEC_PMIV GENMASK_ULL(11, 8) +#define RISCV_IOMMU_REG_IVEC_PIV GENMASK_ULL(15, 12) + +#define RISCV_IOMMU_REG_CQB 0x0018 +#define RISCV_IOMMU_CQB_PPN_START 10 +#define RISCV_IOMMU_CQB_PPN_LEN 44 +#define RISCV_IOMMU_CQB_LOG2SZ_START 0 +#define RISCV_IOMMU_CQB_LOG2SZ_LEN 5 + +#define RISCV_IOMMU_REG_CQT 0x0024 + +#define RISCV_IOMMU_REG_FQB 0x0028 +#define RISCV_IOMMU_FQB_PPN_START 10 +#define RISCV_IOMMU_FQB_PPN_LEN 44 +#define RISCV_IOMMU_FQB_LOG2SZ_START 0 +#define RISCV_IOMMU_FQB_LOG2SZ_LEN 5 + +#define RISCV_IOMMU_REG_FQT 0x0034 + +#define RISCV_IOMMU_REG_PQB 0x0038 +#define RISCV_IOMMU_PQB_PPN_START 10 +#define RISCV_IOMMU_PQB_PPN_LEN 44 +#define RISCV_IOMMU_PQB_LOG2SZ_START 0 +#define RISCV_IOMMU_PQB_LOG2SZ_LEN 5 + +#define RISCV_IOMMU_REG_PQT 0x0044 + +typedef struct QRISCVIOMMU { + QOSGraphObject obj; + QPCIDevice dev; + QPCIBar reg_bar; +} QRISCVIOMMU; + +#endif diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index f7a19032f7..9d51114539 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -305,6 +305,7 @@ qos_test_ss.add( 'vmxnet3-test.c', 'igb-test.c', 'ufs-test.c', + 'riscv-iommu-test.c', ) if config_all_devices.has_key('CONFIG_VIRTIO_SERIAL') diff --git a/tests/qtest/riscv-iommu-test.c b/tests/qtest/riscv-iommu-test.c new file mode 100644 index 0000000000..df0c7813ec --- /dev/null +++ b/tests/qtest/riscv-iommu-test.c @@ -0,0 +1,210 @@ +/* + * QTest testcase for RISC-V IOMMU + * + * Copyright (c) 2024 Ventana Micro Systems Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or (at your + * option) any later version. See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "libqtest-single.h" +#include "qemu/module.h" +#include "libqos/qgraph.h" +#include "libqos/riscv-iommu.h" +#include "hw/pci/pci_regs.h" + +static uint32_t riscv_iommu_read_reg32(QRISCVIOMMU *r_iommu, int reg_offset) +{ + return qpci_io_readl(&r_iommu->dev, r_iommu->reg_bar, reg_offset); +} + +static uint64_t riscv_iommu_read_reg64(QRISCVIOMMU *r_iommu, int reg_offset) +{ + return qpci_io_readq(&r_iommu->dev, r_iommu->reg_bar, reg_offset); +} + +static void riscv_iommu_write_reg32(QRISCVIOMMU *r_iommu, int reg_offset, + uint32_t val) +{ + qpci_io_writel(&r_iommu->dev, r_iommu->reg_bar, reg_offset, val); +} + +static void riscv_iommu_write_reg64(QRISCVIOMMU *r_iommu, int reg_offset, + uint64_t val) +{ + qpci_io_writeq(&r_iommu->dev, r_iommu->reg_bar, reg_offset, val); +} + +static void test_pci_config(void *obj, void *data, QGuestAllocator *t_alloc) +{ + QRISCVIOMMU *r_iommu = obj; + QPCIDevice *dev = &r_iommu->dev; + uint16_t vendorid, deviceid, classid; + + vendorid = qpci_config_readw(dev, PCI_VENDOR_ID); + deviceid = qpci_config_readw(dev, PCI_DEVICE_ID); + classid = qpci_config_readw(dev, PCI_CLASS_DEVICE); + + g_assert_cmpuint(vendorid, ==, RISCV_IOMMU_PCI_VENDOR_ID); + g_assert_cmpuint(deviceid, ==, RISCV_IOMMU_PCI_DEVICE_ID); + g_assert_cmpuint(classid, ==, RISCV_IOMMU_PCI_DEVICE_CLASS); +} + +static void test_reg_reset(void *obj, void *data, QGuestAllocator *t_alloc) +{ + QRISCVIOMMU *r_iommu = obj; + uint64_t cap; + uint32_t reg; + + cap = riscv_iommu_read_reg64(r_iommu, RISCV_IOMMU_REG_CAP); + g_assert_cmpuint(cap & RISCV_IOMMU_CAP_VERSION, ==, 0x10); + + reg = riscv_iommu_read_reg32(r_iommu, RISCV_IOMMU_REG_CQCSR); + g_assert_cmpuint(reg & RISCV_IOMMU_CQCSR_CQEN, ==, 0); + g_assert_cmpuint(reg & RISCV_IOMMU_CQCSR_CIE, ==, 0); + g_assert_cmpuint(reg & RISCV_IOMMU_CQCSR_CQON, ==, 0); + g_assert_cmpuint(reg & RISCV_IOMMU_CQCSR_BUSY, ==, 0); + + reg = riscv_iommu_read_reg32(r_iommu, RISCV_IOMMU_REG_FQCSR); + g_assert_cmpuint(reg & RISCV_IOMMU_FQCSR_FQEN, ==, 0); + g_assert_cmpuint(reg & RISCV_IOMMU_FQCSR_FIE, ==, 0); + g_assert_cmpuint(reg & RISCV_IOMMU_FQCSR_FQON, ==, 0); + g_assert_cmpuint(reg & RISCV_IOMMU_FQCSR_BUSY, ==, 0); + + reg = riscv_iommu_read_reg32(r_iommu, RISCV_IOMMU_REG_PQCSR); + g_assert_cmpuint(reg & RISCV_IOMMU_PQCSR_PQEN, ==, 0); + g_assert_cmpuint(reg & RISCV_IOMMU_PQCSR_PIE, ==, 0); + g_assert_cmpuint(reg & RISCV_IOMMU_PQCSR_PQON, ==, 0); + g_assert_cmpuint(reg & RISCV_IOMMU_PQCSR_BUSY, ==, 0); + + reg = riscv_iommu_read_reg32(r_iommu, RISCV_IOMMU_REG_DDTP); + g_assert_cmpuint(reg & RISCV_IOMMU_DDTP_BUSY, ==, 0); + g_assert_cmpuint(reg & RISCV_IOMMU_DDTP_MODE, ==, + RISCV_IOMMU_DDTP_MODE_OFF); + + reg = riscv_iommu_read_reg32(r_iommu, RISCV_IOMMU_REG_IPSR); + g_assert_cmpuint(reg, ==, 0); +} + +/* + * Common timeout-based poll for CQCSR, FQCSR and PQCSR. All + * their ON bits are mapped as RISCV_IOMMU_QUEUE_ACTIVE (16), + */ +static void qtest_wait_for_queue_active(QRISCVIOMMU *r_iommu, + uint32_t queue_csr) +{ + QTestState *qts = global_qtest; + guint64 timeout_us = 2 * 1000 * 1000; + gint64 start_time = g_get_monotonic_time(); + uint32_t reg; + + for (;;) { + qtest_clock_step(qts, 100); + + reg = riscv_iommu_read_reg32(r_iommu, queue_csr); + if (reg & RISCV_IOMMU_QUEUE_ACTIVE) { + break; + } + g_assert(g_get_monotonic_time() - start_time <= timeout_us); + } +} + +/* + * Goes through the queue activation procedures of chapter 6.2, + * "Guidelines for initialization", of the RISCV-IOMMU spec. + */ +static void test_iommu_init_queues(void *obj, void *data, + QGuestAllocator *t_alloc) +{ + QRISCVIOMMU *r_iommu = obj; + uint64_t reg64, q_addr; + uint32_t reg; + int k = 2; + + reg64 = riscv_iommu_read_reg64(r_iommu, RISCV_IOMMU_REG_CAP); + g_assert_cmpuint(reg64 & RISCV_IOMMU_CAP_VERSION, ==, 0x10); + + /* + * Program the command queue. Write 0xF to civ, fiv, pmiv and + * piv. With the current PCI device impl we expect 2 writable + * bits for each (k = 2) since we have N = 4 total vectors (2^k). + */ + riscv_iommu_write_reg32(r_iommu, RISCV_IOMMU_REG_IVEC, 0xFFFF); + reg = riscv_iommu_read_reg32(r_iommu, RISCV_IOMMU_REG_IVEC); + g_assert_cmpuint(reg & RISCV_IOMMU_REG_IVEC_CIV, ==, 0x3); + g_assert_cmpuint(reg & RISCV_IOMMU_REG_IVEC_FIV, ==, 0x30); + g_assert_cmpuint(reg & RISCV_IOMMU_REG_IVEC_PMIV, ==, 0x300); + g_assert_cmpuint(reg & RISCV_IOMMU_REG_IVEC_PIV, ==, 0x3000); + + /* Alloc a 4*16 bytes buffer and use it to set cqb */ + q_addr = guest_alloc(t_alloc, 4 * 16); + reg64 = 0; + deposit64(reg64, RISCV_IOMMU_CQB_PPN_START, + RISCV_IOMMU_CQB_PPN_LEN, q_addr); + deposit64(reg64, RISCV_IOMMU_CQB_LOG2SZ_START, + RISCV_IOMMU_CQB_LOG2SZ_LEN, k - 1); + riscv_iommu_write_reg64(r_iommu, RISCV_IOMMU_REG_CQB, reg64); + + /* cqt = 0, cqcsr.cqen = 1, poll cqcsr.cqon until it reads 1 */ + riscv_iommu_write_reg32(r_iommu, RISCV_IOMMU_REG_CQT, 0); + + reg = riscv_iommu_read_reg32(r_iommu, RISCV_IOMMU_REG_CQCSR); + reg |= RISCV_IOMMU_CQCSR_CQEN; + riscv_iommu_write_reg32(r_iommu, RISCV_IOMMU_REG_CQCSR, reg); + + qtest_wait_for_queue_active(r_iommu, RISCV_IOMMU_REG_CQCSR); + + /* + * Program the fault queue. Alloc a 4*32 bytes (instead of 4*16) + * buffer and use it to set fqb. + */ + q_addr = guest_alloc(t_alloc, 4 * 32); + reg64 = 0; + deposit64(reg64, RISCV_IOMMU_FQB_PPN_START, + RISCV_IOMMU_FQB_PPN_LEN, q_addr); + deposit64(reg64, RISCV_IOMMU_FQB_LOG2SZ_START, + RISCV_IOMMU_FQB_LOG2SZ_LEN, k - 1); + riscv_iommu_write_reg64(r_iommu, RISCV_IOMMU_REG_FQB, reg64); + + /* fqt = 0, fqcsr.fqen = 1, poll fqcsr.fqon until it reads 1 */ + riscv_iommu_write_reg32(r_iommu, RISCV_IOMMU_REG_FQT, 0); + + reg = riscv_iommu_read_reg32(r_iommu, RISCV_IOMMU_REG_FQCSR); + reg |= RISCV_IOMMU_FQCSR_FQEN; + riscv_iommu_write_reg32(r_iommu, RISCV_IOMMU_REG_FQCSR, reg); + + qtest_wait_for_queue_active(r_iommu, RISCV_IOMMU_REG_FQCSR); + + /* + * Program the page-request queue. Alloc a 4*16 bytes buffer + * and use it to set pqb. + */ + q_addr = guest_alloc(t_alloc, 4 * 16); + reg64 = 0; + deposit64(reg64, RISCV_IOMMU_PQB_PPN_START, + RISCV_IOMMU_PQB_PPN_LEN, q_addr); + deposit64(reg64, RISCV_IOMMU_PQB_LOG2SZ_START, + RISCV_IOMMU_PQB_LOG2SZ_LEN, k - 1); + riscv_iommu_write_reg64(r_iommu, RISCV_IOMMU_REG_PQB, reg64); + + /* pqt = 0, pqcsr.pqen = 1, poll pqcsr.pqon until it reads 1 */ + riscv_iommu_write_reg32(r_iommu, RISCV_IOMMU_REG_PQT, 0); + + reg = riscv_iommu_read_reg32(r_iommu, RISCV_IOMMU_REG_PQCSR); + reg |= RISCV_IOMMU_PQCSR_PQEN; + riscv_iommu_write_reg32(r_iommu, RISCV_IOMMU_REG_PQCSR, reg); + + qtest_wait_for_queue_active(r_iommu, RISCV_IOMMU_REG_PQCSR); +} + +static void register_riscv_iommu_test(void) +{ + qos_add_test("pci_config", "riscv-iommu-pci", test_pci_config, NULL); + qos_add_test("reg_reset", "riscv-iommu-pci", test_reg_reset, NULL); + qos_add_test("iommu_init_queues", "riscv-iommu-pci", + test_iommu_init_queues, NULL); +} + +libqos_init(register_riscv_iommu_test);