qemu/target/mips/tcg/mxu_translate.c
Michael Tokarev d5c9fa4708 hw/mips: spelling fixes
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20230823065335.1919380-7-mjt@tls.msk.ru>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
2023-08-31 19:47:43 +02:00

5111 lines
156 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Ingenic XBurst Media eXtension Unit (MXU) translation routines.
*
* Copyright (c) 2004-2005 Jocelyn Mayer
* Copyright (c) 2006 Marius Groeger (FPU operations)
* Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
* Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
* Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*
* Datasheet:
*
* "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
* Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
*/
#include "qemu/osdep.h"
#include "translate.h"
/*
*
* AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET
* ============================================
*
*
* MXU (full name: MIPS eXtension/enhanced Unit) is a SIMD extension of MIPS32
* instructions set. It is designed to fit the needs of signal, graphical and
* video processing applications. MXU instruction set is used in Xburst family
* of microprocessors by Ingenic.
*
* MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is
* the control register.
*
*
* The notation used in MXU assembler mnemonics
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* Register operands:
*
* XRa, XRb, XRc, XRd - MXU registers
* Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers
*
* Non-register operands:
*
* aptn1 - 1-bit accumulate add/subtract pattern
* aptn2 - 2-bit accumulate add/subtract pattern
* eptn2 - 2-bit execute add/subtract pattern
* optn2 - 2-bit operand pattern
* optn3 - 3-bit operand pattern
* sft4 - 4-bit shift amount
* strd2 - 2-bit stride amount
*
* Prefixes:
*
* Level of parallelism: Operand size:
* S - single operation at a time 32 - word
* D - two operations in parallel 16 - half word
* Q - four operations in parallel 8 - byte
*
* Operations:
*
* ADD - Add or subtract
* ADDC - Add with carry-in
* ACC - Accumulate
* ASUM - Sum together then accumulate (add or subtract)
* ASUMC - Sum together then accumulate (add or subtract) with carry-in
* AVG - Average between 2 operands
* ABD - Absolute difference
* ALN - Align data
* AND - Logical bitwise 'and' operation
* CPS - Copy sign
* EXTR - Extract bits
* I2M - Move from GPR register to MXU register
* LDD - Load data from memory to XRF
* LDI - Load data from memory to XRF (and increase the address base)
* LUI - Load unsigned immediate
* MUL - Multiply
* MULU - Unsigned multiply
* MADD - 64-bit operand add 32x32 product
* MSUB - 64-bit operand subtract 32x32 product
* MAC - Multiply and accumulate (add or subtract)
* MAD - Multiply and add or subtract
* MAX - Maximum between 2 operands
* MIN - Minimum between 2 operands
* M2I - Move from MXU register to GPR register
* MOVZ - Move if zero
* MOVN - Move if non-zero
* NOR - Logical bitwise 'nor' operation
* OR - Logical bitwise 'or' operation
* STD - Store data from XRF to memory
* SDI - Store data from XRF to memory (and increase the address base)
* SLT - Set of less than comparison
* SAD - Sum of absolute differences
* SLL - Logical shift left
* SLR - Logical shift right
* SAR - Arithmetic shift right
* SAT - Saturation
* SFL - Shuffle
* SCOP - Calculate xs scope (-1, means x<0; 0, means x==0; 1, means x>0)
* XOR - Logical bitwise 'exclusive or' operation
*
* Suffixes:
*
* E - Expand results
* F - Fixed point multiplication
* L - Low part result
* R - Doing rounding
* V - Variable instead of immediate
* W - Combine above L and V
*
*
* The list of MXU instructions grouped by functionality
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* Load/Store instructions Multiplication instructions
* ----------------------- ---------------------------
*
* S32LDD XRa, Rb, s12 S32MADD XRa, XRd, Rs, Rt
* S32STD XRa, Rb, s12 S32MADDU XRa, XRd, Rs, Rt
* S32LDDV XRa, Rb, rc, strd2 S32MSUB XRa, XRd, Rs, Rt
* S32STDV XRa, Rb, rc, strd2 S32MSUBU XRa, XRd, Rs, Rt
* S32LDI XRa, Rb, s12 S32MUL XRa, XRd, Rs, Rt
* S32SDI XRa, Rb, s12 S32MULU XRa, XRd, Rs, Rt
* S32LDIV XRa, Rb, rc, strd2 D16MUL XRa, XRb, XRc, XRd, optn2
* S32SDIV XRa, Rb, rc, strd2 D16MULE XRa, XRb, XRc, optn2
* S32LDDR XRa, Rb, s12 D16MULF XRa, XRb, XRc, optn2
* S32STDR XRa, Rb, s12 D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
* S32LDDVR XRa, Rb, rc, strd2 D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
* S32STDVR XRa, Rb, rc, strd2 D16MACF XRa, XRb, XRc, XRd, aptn2, optn2
* S32LDIR XRa, Rb, s12 D16MADL XRa, XRb, XRc, XRd, aptn2, optn2
* S32SDIR XRa, Rb, s12 S16MAD XRa, XRb, XRc, XRd, aptn1, optn2
* S32LDIVR XRa, Rb, rc, strd2 Q8MUL XRa, XRb, XRc, XRd
* S32SDIVR XRa, Rb, rc, strd2 Q8MULSU XRa, XRb, XRc, XRd
* S16LDD XRa, Rb, s10, eptn2 Q8MAC XRa, XRb, XRc, XRd, aptn2
* S16STD XRa, Rb, s10, eptn2 Q8MACSU XRa, XRb, XRc, XRd, aptn2
* S16LDI XRa, Rb, s10, eptn2 Q8MADL XRa, XRb, XRc, XRd, aptn2
* S16SDI XRa, Rb, s10, eptn2
* S8LDD XRa, Rb, s8, eptn3
* S8STD XRa, Rb, s8, eptn3 Addition and subtraction instructions
* S8LDI XRa, Rb, s8, eptn3 -------------------------------------
* S8SDI XRa, Rb, s8, eptn3
* LXW Rd, Rs, Rt, strd2 D32ADD XRa, XRb, XRc, XRd, eptn2
* LXH Rd, Rs, Rt, strd2 D32ADDC XRa, XRb, XRc, XRd
* LXHU Rd, Rs, Rt, strd2 D32ACC XRa, XRb, XRc, XRd, eptn2
* LXB Rd, Rs, Rt, strd2 D32ACCM XRa, XRb, XRc, XRd, eptn2
* LXBU Rd, Rs, Rt, strd2 D32ASUM XRa, XRb, XRc, XRd, eptn2
* S32CPS XRa, XRb, XRc
* Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2
* Comparison instructions Q16ACC XRa, XRb, XRc, XRd, eptn2
* ----------------------- Q16ACCM XRa, XRb, XRc, XRd, eptn2
* D16ASUM XRa, XRb, XRc, XRd, eptn2
* S32MAX XRa, XRb, XRc D16CPS XRa, XRb,
* S32MIN XRa, XRb, XRc D16AVG XRa, XRb, XRc
* S32SLT XRa, XRb, XRc D16AVGR XRa, XRb, XRc
* S32MOVZ XRa, XRb, XRc Q8ADD XRa, XRb, XRc, eptn2
* S32MOVN XRa, XRb, XRc Q8ADDE XRa, XRb, XRc, XRd, eptn2
* D16MAX XRa, XRb, XRc Q8ACCE XRa, XRb, XRc, XRd, eptn2
* D16MIN XRa, XRb, XRc Q8ABD XRa, XRb, XRc
* D16SLT XRa, XRb, XRc Q8SAD XRa, XRb, XRc, XRd
* D16MOVZ XRa, XRb, XRc Q8AVG XRa, XRb, XRc
* D16MOVN XRa, XRb, XRc Q8AVGR XRa, XRb, XRc
* Q8MAX XRa, XRb, XRc D8SUM XRa, XRb, XRc, XRd
* Q8MIN XRa, XRb, XRc D8SUMC XRa, XRb, XRc, XRd
* Q8SLT XRa, XRb, XRc
* Q8SLTU XRa, XRb, XRc
* Q8MOVZ XRa, XRb, XRc Shift instructions
* Q8MOVN XRa, XRb, XRc ------------------
*
* D32SLL XRa, XRb, XRc, XRd, sft4
* Bitwise instructions D32SLR XRa, XRb, XRc, XRd, sft4
* -------------------- D32SAR XRa, XRb, XRc, XRd, sft4
* D32SARL XRa, XRb, XRc, sft4
* S32NOR XRa, XRb, XRc D32SLLV XRa, XRb, Rb
* S32AND XRa, XRb, XRc D32SLRV XRa, XRb, Rb
* S32XOR XRa, XRb, XRc D32SARV XRa, XRb, Rb
* S32OR XRa, XRb, XRc D32SARW XRa, XRb, XRc, Rb
* Q16SLL XRa, XRb, XRc, XRd, sft4
* Q16SLR XRa, XRb, XRc, XRd, sft4
* Miscellaneous instructions Q16SAR XRa, XRb, XRc, XRd, sft4
* ------------------------- Q16SLLV XRa, XRb, Rb
* Q16SLRV XRa, XRb, Rb
* S32SFL XRa, XRb, XRc, XRd, optn2 Q16SARV XRa, XRb, Rb
* S32ALN XRa, XRb, XRc, Rb
* S32ALNI XRa, XRb, XRc, s3
* S32LUI XRa, s8, optn3 Move instructions
* S32EXTR XRa, XRb, Rb, bits5 -----------------
* S32EXTRV XRa, XRb, Rs, Rt
* Q16SCOP XRa, XRb, XRc, XRd S32M2I XRa, Rb
* Q16SAT XRa, XRb, XRc S32I2M XRa, Rb
*
*
* The opcode organization of MXU instructions
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* The bits 31..26 of all MXU instructions are equal to 0x1C (also referred
* as opcode SPECIAL2 in the base MIPS ISA). The organization and meaning of
* other bits up to the instruction level is as follows:
*
* bits
* 05..00
*
* ┌─ 000000 ─ OPC_MXU_S32MADD
* ├─ 000001 ─ OPC_MXU_S32MADDU
* ├─ 000010 ─ <not assigned> (non-MXU OPC_MUL)
* │
* │ 20..18
* ├─ 000011 ─ OPC_MXU__POOL00 ─┬─ 000 ─ OPC_MXU_S32MAX
* │ ├─ 001 ─ OPC_MXU_S32MIN
* │ ├─ 010 ─ OPC_MXU_D16MAX
* │ ├─ 011 ─ OPC_MXU_D16MIN
* │ ├─ 100 ─ OPC_MXU_Q8MAX
* │ ├─ 101 ─ OPC_MXU_Q8MIN
* │ ├─ 110 ─ OPC_MXU_Q8SLT
* │ └─ 111 ─ OPC_MXU_Q8SLTU
* ├─ 000100 ─ OPC_MXU_S32MSUB
* ├─ 000101 ─ OPC_MXU_S32MSUBU 20..18
* ├─ 000110 ─ OPC_MXU__POOL01 ─┬─ 000 ─ OPC_MXU_S32SLT
* │ ├─ 001 ─ OPC_MXU_D16SLT
* │ ├─ 010 ─ OPC_MXU_D16AVG
* │ ├─ 011 ─ OPC_MXU_D16AVGR
* │ ├─ 100 ─ OPC_MXU_Q8AVG
* │ ├─ 101 ─ OPC_MXU_Q8AVGR
* │ └─ 111 ─ OPC_MXU_Q8ADD
* │
* │ 20..18
* ├─ 000111 ─ OPC_MXU__POOL02 ─┬─ 000 ─ OPC_MXU_S32CPS
* │ ├─ 010 ─ OPC_MXU_D16CPS
* │ ├─ 100 ─ OPC_MXU_Q8ABD
* │ └─ 110 ─ OPC_MXU_Q16SAT
* ├─ 001000 ─ OPC_MXU_D16MUL
* │ 25..24
* ├─ 001001 ─ OPC_MXU__POOL03 ─┬─ 00 ─ OPC_MXU_D16MULF
* │ └─ 01 ─ OPC_MXU_D16MULE
* ├─ 001010 ─ OPC_MXU_D16MAC
* ├─ 001011 ─ OPC_MXU_D16MACF
* ├─ 001100 ─ OPC_MXU_D16MADL
* ├─ 001101 ─ OPC_MXU_S16MAD
* ├─ 001110 ─ OPC_MXU_Q16ADD
* ├─ 001111 ─ OPC_MXU_D16MACE 20 (13..10 don't care)
* │ ┌─ 0 ─ OPC_MXU_S32LDD
* ├─ 010000 ─ OPC_MXU__POOL04 ─┴─ 1 ─ OPC_MXU_S32LDDR
* │
* │ 20 (13..10 don't care)
* ├─ 010001 ─ OPC_MXU__POOL05 ─┬─ 0 ─ OPC_MXU_S32STD
* │ └─ 1 ─ OPC_MXU_S32STDR
* │
* │ 13..10
* ├─ 010010 ─ OPC_MXU__POOL06 ─┬─ 0000 ─ OPC_MXU_S32LDDV
* │ └─ 0001 ─ OPC_MXU_S32LDDVR
* │
* │ 13..10
* ├─ 010011 ─ OPC_MXU__POOL07 ─┬─ 0000 ─ OPC_MXU_S32STDV
* │ └─ 0001 ─ OPC_MXU_S32STDVR
* │
* │ 20 (13..10 don't care)
* ├─ 010100 ─ OPC_MXU__POOL08 ─┬─ 0 ─ OPC_MXU_S32LDI
* │ └─ 1 ─ OPC_MXU_S32LDIR
* │
* │ 20 (13..10 don't care)
* ├─ 010101 ─ OPC_MXU__POOL09 ─┬─ 0 ─ OPC_MXU_S32SDI
* │ └─ 1 ─ OPC_MXU_S32SDIR
* │
* │ 13..10
* ├─ 010110 ─ OPC_MXU__POOL10 ─┬─ 0000 ─ OPC_MXU_S32LDIV
* │ └─ 0001 ─ OPC_MXU_S32LDIVR
* │
* │ 13..10
* ├─ 010111 ─ OPC_MXU__POOL11 ─┬─ 0000 ─ OPC_MXU_S32SDIV
* │ └─ 0001 ─ OPC_MXU_S32SDIVR
* ├─ 011000 ─ OPC_MXU_D32ADD (catches D32ADDC too)
* │ 23..22
* MXU ├─ 011001 ─ OPC_MXU__POOL12 ─┬─ 00 ─ OPC_MXU_D32ACC
* opcodes ─┤ ├─ 01 ─ OPC_MXU_D32ACCM
* │ └─ 10 ─ OPC_MXU_D32ASUM
* ├─ 011010 ─ <not assigned>
* │ 23..22
* ├─ 011011 ─ OPC_MXU__POOL13 ─┬─ 00 ─ OPC_MXU_Q16ACC
* │ ├─ 01 ─ OPC_MXU_Q16ACCM
* │ └─ 10 ─ OPC_MXU_D16ASUM
* │
* │ 23..22
* ├─ 011100 ─ OPC_MXU__POOL14 ─┬─ 00 ─ OPC_MXU_Q8ADDE
* │ ├─ 01 ─ OPC_MXU_D8SUM
* ├─ 011101 ─ OPC_MXU_Q8ACCE └─ 10 ─ OPC_MXU_D8SUMC
* ├─ 011110 ─ <not assigned>
* ├─ 011111 ─ <not assigned>
* ├─ 100000 ─ <not assigned> (overlaps with CLZ)
* ├─ 100001 ─ <not assigned> (overlaps with CLO)
* ├─ 100010 ─ OPC_MXU_S8LDD
* ├─ 100011 ─ OPC_MXU_S8STD 15..14
* ├─ 100100 ─ OPC_MXU_S8LDI ┌─ 00 ─ OPC_MXU_S32MUL
* ├─ 100101 ─ OPC_MXU_S8SDI ├─ 01 ─ OPC_MXU_S32MULU
* │ ├─ 10 ─ OPC_MXU_S32EXTR
* ├─ 100110 ─ OPC_MXU__POOL15 ─┴─ 11 ─ OPC_MXU_S32EXTRV
* │
* │ 20..18
* ├─ 100111 ─ OPC_MXU__POOL16 ─┬─ 000 ─ OPC_MXU_D32SARW
* │ ├─ 001 ─ OPC_MXU_S32ALN
* │ ├─ 010 ─ OPC_MXU_S32ALNI
* │ ├─ 011 ─ OPC_MXU_S32LUI
* │ ├─ 100 ─ OPC_MXU_S32NOR
* │ ├─ 101 ─ OPC_MXU_S32AND
* │ ├─ 110 ─ OPC_MXU_S32OR
* │ └─ 111 ─ OPC_MXU_S32XOR
* │
* │ 8..6
* ├─ 101000 ─ OPC_MXU__POOL17 ─┬─ 000 ─ OPC_MXU_LXB
* │ ├─ 001 ─ OPC_MXU_LXH
* ├─ 101001 ─ <not assigned> ├─ 011 ─ OPC_MXU_LXW
* ├─ 101010 ─ OPC_MXU_S16LDD ├─ 100 ─ OPC_MXU_LXBU
* ├─ 101011 ─ OPC_MXU_S16STD └─ 101 ─ OPC_MXU_LXHU
* ├─ 101100 ─ OPC_MXU_S16LDI
* ├─ 101101 ─ OPC_MXU_S16SDI
* ├─ 101110 ─ OPC_MXU_S32M2I
* ├─ 101111 ─ OPC_MXU_S32I2M
* ├─ 110000 ─ OPC_MXU_D32SLL
* ├─ 110001 ─ OPC_MXU_D32SLR 20..18
* ├─ 110010 ─ OPC_MXU_D32SARL ┌─ 000 ─ OPC_MXU_D32SLLV
* ├─ 110011 ─ OPC_MXU_D32SAR ├─ 001 ─ OPC_MXU_D32SLRV
* ├─ 110100 ─ OPC_MXU_Q16SLL ├─ 011 ─ OPC_MXU_D32SARV
* ├─ 110101 ─ OPC_MXU_Q16SLR ├─ 100 ─ OPC_MXU_Q16SLLV
* │ ├─ 101 ─ OPC_MXU_Q16SLRV
* ├─ 110110 ─ OPC_MXU__POOL18 ─┴─ 111 ─ OPC_MXU_Q16SARV
* │
* ├─ 110111 ─ OPC_MXU_Q16SAR
* │ 23..22
* ├─ 111000 ─ OPC_MXU__POOL19 ─┬─ 00 ─ OPC_MXU_Q8MUL
* │ └─ 10 ─ OPC_MXU_Q8MULSU
* │
* │ 20..18
* ├─ 111001 ─ OPC_MXU__POOL20 ─┬─ 000 ─ OPC_MXU_Q8MOVZ
* │ ├─ 001 ─ OPC_MXU_Q8MOVN
* │ ├─ 010 ─ OPC_MXU_D16MOVZ
* │ ├─ 011 ─ OPC_MXU_D16MOVN
* │ ├─ 100 ─ OPC_MXU_S32MOVZ
* │ └─ 101 ─ OPC_MXU_S32MOVN
* │
* │ 23..22
* ├─ 111010 ─ OPC_MXU__POOL21 ─┬─ 00 ─ OPC_MXU_Q8MAC
* │ └─ 10 ─ OPC_MXU_Q8MACSU
* ├─ 111011 ─ OPC_MXU_Q16SCOP
* ├─ 111100 ─ OPC_MXU_Q8MADL
* ├─ 111101 ─ OPC_MXU_S32SFL
* ├─ 111110 ─ OPC_MXU_Q8SAD
* └─ 111111 ─ <not assigned> (overlaps with SDBBP)
*
*
* Compiled after:
*
* "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
* Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
*/
enum {
OPC_MXU_S32MADD = 0x00,
OPC_MXU_S32MADDU = 0x01,
OPC_MXU__POOL00 = 0x03,
OPC_MXU_S32MSUB = 0x04,
OPC_MXU_S32MSUBU = 0x05,
OPC_MXU__POOL01 = 0x06,
OPC_MXU__POOL02 = 0x07,
OPC_MXU_D16MUL = 0x08,
OPC_MXU__POOL03 = 0x09,
OPC_MXU_D16MAC = 0x0A,
OPC_MXU_D16MACF = 0x0B,
OPC_MXU_D16MADL = 0x0C,
OPC_MXU_S16MAD = 0x0D,
OPC_MXU_Q16ADD = 0x0E,
OPC_MXU_D16MACE = 0x0F,
OPC_MXU__POOL04 = 0x10,
OPC_MXU__POOL05 = 0x11,
OPC_MXU__POOL06 = 0x12,
OPC_MXU__POOL07 = 0x13,
OPC_MXU__POOL08 = 0x14,
OPC_MXU__POOL09 = 0x15,
OPC_MXU__POOL10 = 0x16,
OPC_MXU__POOL11 = 0x17,
OPC_MXU_D32ADD = 0x18,
OPC_MXU__POOL12 = 0x19,
OPC_MXU__POOL13 = 0x1B,
OPC_MXU__POOL14 = 0x1C,
OPC_MXU_Q8ACCE = 0x1D,
OPC_MXU_S8LDD = 0x22,
OPC_MXU_S8STD = 0x23,
OPC_MXU_S8LDI = 0x24,
OPC_MXU_S8SDI = 0x25,
OPC_MXU__POOL15 = 0x26,
OPC_MXU__POOL16 = 0x27,
OPC_MXU__POOL17 = 0x28,
OPC_MXU_S16LDD = 0x2A,
OPC_MXU_S16STD = 0x2B,
OPC_MXU_S16LDI = 0x2C,
OPC_MXU_S16SDI = 0x2D,
OPC_MXU_S32M2I = 0x2E,
OPC_MXU_S32I2M = 0x2F,
OPC_MXU_D32SLL = 0x30,
OPC_MXU_D32SLR = 0x31,
OPC_MXU_D32SARL = 0x32,
OPC_MXU_D32SAR = 0x33,
OPC_MXU_Q16SLL = 0x34,
OPC_MXU_Q16SLR = 0x35,
OPC_MXU__POOL18 = 0x36,
OPC_MXU_Q16SAR = 0x37,
OPC_MXU__POOL19 = 0x38,
OPC_MXU__POOL20 = 0x39,
OPC_MXU__POOL21 = 0x3A,
OPC_MXU_Q16SCOP = 0x3B,
OPC_MXU_Q8MADL = 0x3C,
OPC_MXU_S32SFL = 0x3D,
OPC_MXU_Q8SAD = 0x3E,
};
/*
* MXU pool 00
*/
enum {
OPC_MXU_S32MAX = 0x00,
OPC_MXU_S32MIN = 0x01,
OPC_MXU_D16MAX = 0x02,
OPC_MXU_D16MIN = 0x03,
OPC_MXU_Q8MAX = 0x04,
OPC_MXU_Q8MIN = 0x05,
OPC_MXU_Q8SLT = 0x06,
OPC_MXU_Q8SLTU = 0x07,
};
/*
* MXU pool 01
*/
enum {
OPC_MXU_S32SLT = 0x00,
OPC_MXU_D16SLT = 0x01,
OPC_MXU_D16AVG = 0x02,
OPC_MXU_D16AVGR = 0x03,
OPC_MXU_Q8AVG = 0x04,
OPC_MXU_Q8AVGR = 0x05,
OPC_MXU_Q8ADD = 0x07,
};
/*
* MXU pool 02
*/
enum {
OPC_MXU_S32CPS = 0x00,
OPC_MXU_D16CPS = 0x02,
OPC_MXU_Q8ABD = 0x04,
OPC_MXU_Q16SAT = 0x06,
};
/*
* MXU pool 03
*/
enum {
OPC_MXU_D16MULF = 0x00,
OPC_MXU_D16MULE = 0x01,
};
/*
* MXU pool 04 05 06 07 08 09 10 11
*/
enum {
OPC_MXU_S32LDST = 0x00,
OPC_MXU_S32LDSTR = 0x01,
};
/*
* MXU pool 12
*/
enum {
OPC_MXU_D32ACC = 0x00,
OPC_MXU_D32ACCM = 0x01,
OPC_MXU_D32ASUM = 0x02,
};
/*
* MXU pool 13
*/
enum {
OPC_MXU_Q16ACC = 0x00,
OPC_MXU_Q16ACCM = 0x01,
OPC_MXU_D16ASUM = 0x02,
};
/*
* MXU pool 14
*/
enum {
OPC_MXU_Q8ADDE = 0x00,
OPC_MXU_D8SUM = 0x01,
OPC_MXU_D8SUMC = 0x02,
};
/*
* MXU pool 15
*/
enum {
OPC_MXU_S32MUL = 0x00,
OPC_MXU_S32MULU = 0x01,
OPC_MXU_S32EXTR = 0x02,
OPC_MXU_S32EXTRV = 0x03,
};
/*
* MXU pool 16
*/
enum {
OPC_MXU_D32SARW = 0x00,
OPC_MXU_S32ALN = 0x01,
OPC_MXU_S32ALNI = 0x02,
OPC_MXU_S32LUI = 0x03,
OPC_MXU_S32NOR = 0x04,
OPC_MXU_S32AND = 0x05,
OPC_MXU_S32OR = 0x06,
OPC_MXU_S32XOR = 0x07,
};
/*
* MXU pool 17
*/
enum {
OPC_MXU_LXB = 0x00,
OPC_MXU_LXH = 0x01,
OPC_MXU_LXW = 0x03,
OPC_MXU_LXBU = 0x04,
OPC_MXU_LXHU = 0x05,
};
/*
* MXU pool 18
*/
enum {
OPC_MXU_D32SLLV = 0x00,
OPC_MXU_D32SLRV = 0x01,
OPC_MXU_D32SARV = 0x03,
OPC_MXU_Q16SLLV = 0x04,
OPC_MXU_Q16SLRV = 0x05,
OPC_MXU_Q16SARV = 0x07,
};
/*
* MXU pool 19
*/
enum {
OPC_MXU_Q8MUL = 0x00,
OPC_MXU_Q8MULSU = 0x02,
};
/*
* MXU pool 20
*/
enum {
OPC_MXU_Q8MOVZ = 0x00,
OPC_MXU_Q8MOVN = 0x01,
OPC_MXU_D16MOVZ = 0x02,
OPC_MXU_D16MOVN = 0x03,
OPC_MXU_S32MOVZ = 0x04,
OPC_MXU_S32MOVN = 0x05,
};
/*
* MXU pool 21
*/
enum {
OPC_MXU_Q8MAC = 0x00,
OPC_MXU_Q8MACSU = 0x02,
};
/* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
#define MXU_APTN1_A 0
#define MXU_APTN1_S 1
/* MXU accumulate add/subtract 2-bit pattern 'aptn2' */
#define MXU_APTN2_AA 0
#define MXU_APTN2_AS 1
#define MXU_APTN2_SA 2
#define MXU_APTN2_SS 3
/* MXU execute add/subtract 2-bit pattern 'eptn2' */
#define MXU_EPTN2_AA 0
#define MXU_EPTN2_AS 1
#define MXU_EPTN2_SA 2
#define MXU_EPTN2_SS 3
/* MXU operand getting pattern 'optn2' */
#define MXU_OPTN2_PTN0 0
#define MXU_OPTN2_PTN1 1
#define MXU_OPTN2_PTN2 2
#define MXU_OPTN2_PTN3 3
/* alternative naming scheme for 'optn2' */
#define MXU_OPTN2_WW 0
#define MXU_OPTN2_LW 1
#define MXU_OPTN2_HW 2
#define MXU_OPTN2_XW 3
/* MXU operand getting pattern 'optn3' */
#define MXU_OPTN3_PTN0 0
#define MXU_OPTN3_PTN1 1
#define MXU_OPTN3_PTN2 2
#define MXU_OPTN3_PTN3 3
#define MXU_OPTN3_PTN4 4
#define MXU_OPTN3_PTN5 5
#define MXU_OPTN3_PTN6 6
#define MXU_OPTN3_PTN7 7
/* MXU registers */
static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1];
static TCGv mxu_CR;
static const char mxuregnames[NUMBER_OF_MXU_REGISTERS][4] = {
"XR1", "XR2", "XR3", "XR4", "XR5", "XR6", "XR7", "XR8",
"XR9", "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "XCR",
};
void mxu_translate_init(void)
{
for (unsigned i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) {
mxu_gpr[i] = tcg_global_mem_new(cpu_env,
offsetof(CPUMIPSState, active_tc.mxu_gpr[i]),
mxuregnames[i]);
}
mxu_CR = tcg_global_mem_new(cpu_env,
offsetof(CPUMIPSState, active_tc.mxu_cr),
mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]);
}
/* MXU General purpose registers moves. */
static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg)
{
if (reg == 0) {
tcg_gen_movi_tl(t, 0);
} else if (reg <= 15) {
tcg_gen_mov_tl(t, mxu_gpr[reg - 1]);
}
}
static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg)
{
if (reg > 0 && reg <= 15) {
tcg_gen_mov_tl(mxu_gpr[reg - 1], t);
}
}
static inline void gen_extract_mxu_gpr(TCGv t, unsigned int reg,
unsigned int ofs, unsigned int len)
{
if (reg == 0) {
tcg_gen_movi_tl(t, 0);
} else if (reg <= 15) {
tcg_gen_extract_tl(t, mxu_gpr[reg - 1], ofs, len);
}
}
/* MXU control register moves. */
static inline void gen_load_mxu_cr(TCGv t)
{
tcg_gen_mov_tl(t, mxu_CR);
}
static inline void gen_store_mxu_cr(TCGv t)
{
/* TODO: Add handling of RW rules for MXU_CR. */
tcg_gen_mov_tl(mxu_CR, t);
}
/*
* S32I2M XRa, rb - Register move from GRF to XRF
*/
static void gen_mxu_s32i2m(DisasContext *ctx)
{
TCGv t0;
uint32_t XRa, Rb;
t0 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 5);
Rb = extract32(ctx->opcode, 16, 5);
gen_load_gpr(t0, Rb);
if (XRa <= 15) {
gen_store_mxu_gpr(t0, XRa);
} else if (XRa == 16) {
gen_store_mxu_cr(t0);
}
}
/*
* S32M2I XRa, rb - Register move from XRF to GRF
*/
static void gen_mxu_s32m2i(DisasContext *ctx)
{
TCGv t0;
uint32_t XRa, Rb;
t0 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 5);
Rb = extract32(ctx->opcode, 16, 5);
if (XRa <= 15) {
gen_load_mxu_gpr(t0, XRa);
} else if (XRa == 16) {
gen_load_mxu_cr(t0);
}
gen_store_gpr(t0, Rb);
}
/*
* S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
*
* S8LDI XRa, Rb, s8, optn3 - Load a byte from memory to XRF,
* post modify address register
*/
static void gen_mxu_s8ldd(DisasContext *ctx, bool postmodify)
{
TCGv t0, t1;
uint32_t XRa, Rb, s8, optn3;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
s8 = extract32(ctx->opcode, 10, 8);
optn3 = extract32(ctx->opcode, 18, 3);
Rb = extract32(ctx->opcode, 21, 5);
gen_load_gpr(t0, Rb);
tcg_gen_addi_tl(t0, t0, (int8_t)s8);
if (postmodify) {
gen_store_gpr(t0, Rb);
}
switch (optn3) {
/* XRa[7:0] = tmp8 */
case MXU_OPTN3_PTN0:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
gen_load_mxu_gpr(t0, XRa);
tcg_gen_deposit_tl(t0, t0, t1, 0, 8);
break;
/* XRa[15:8] = tmp8 */
case MXU_OPTN3_PTN1:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
gen_load_mxu_gpr(t0, XRa);
tcg_gen_deposit_tl(t0, t0, t1, 8, 8);
break;
/* XRa[23:16] = tmp8 */
case MXU_OPTN3_PTN2:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
gen_load_mxu_gpr(t0, XRa);
tcg_gen_deposit_tl(t0, t0, t1, 16, 8);
break;
/* XRa[31:24] = tmp8 */
case MXU_OPTN3_PTN3:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
gen_load_mxu_gpr(t0, XRa);
tcg_gen_deposit_tl(t0, t0, t1, 24, 8);
break;
/* XRa = {8'b0, tmp8, 8'b0, tmp8} */
case MXU_OPTN3_PTN4:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
break;
/* XRa = {tmp8, 8'b0, tmp8, 8'b0} */
case MXU_OPTN3_PTN5:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
tcg_gen_shli_tl(t1, t1, 8);
tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
break;
/* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */
case MXU_OPTN3_PTN6:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB);
tcg_gen_mov_tl(t0, t1);
tcg_gen_andi_tl(t0, t0, 0xFF00FFFF);
tcg_gen_shli_tl(t1, t1, 16);
tcg_gen_or_tl(t0, t0, t1);
break;
/* XRa = {tmp8, tmp8, tmp8, tmp8} */
case MXU_OPTN3_PTN7:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
tcg_gen_deposit_tl(t1, t1, t1, 8, 8);
tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
break;
}
gen_store_mxu_gpr(t0, XRa);
}
/*
* S8STD XRa, Rb, s8, optn3 - Store a byte from XRF to memory
*
* S8SDI XRa, Rb, s8, optn3 - Store a byte from XRF to memory,
* post modify address register
*/
static void gen_mxu_s8std(DisasContext *ctx, bool postmodify)
{
TCGv t0, t1;
uint32_t XRa, Rb, s8, optn3;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
s8 = extract32(ctx->opcode, 10, 8);
optn3 = extract32(ctx->opcode, 18, 3);
Rb = extract32(ctx->opcode, 21, 5);
if (optn3 > 3) {
/* reserved, do nothing */
return;
}
gen_load_gpr(t0, Rb);
tcg_gen_addi_tl(t0, t0, (int8_t)s8);
if (postmodify) {
gen_store_gpr(t0, Rb);
}
gen_load_mxu_gpr(t1, XRa);
switch (optn3) {
/* XRa[7:0] => tmp8 */
case MXU_OPTN3_PTN0:
tcg_gen_extract_tl(t1, t1, 0, 8);
break;
/* XRa[15:8] => tmp8 */
case MXU_OPTN3_PTN1:
tcg_gen_extract_tl(t1, t1, 8, 8);
break;
/* XRa[23:16] => tmp8 */
case MXU_OPTN3_PTN2:
tcg_gen_extract_tl(t1, t1, 16, 8);
break;
/* XRa[31:24] => tmp8 */
case MXU_OPTN3_PTN3:
tcg_gen_extract_tl(t1, t1, 24, 8);
break;
}
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UB);
}
/*
* S16LDD XRa, Rb, s10, optn2 - Load a halfword from memory to XRF
*
* S16LDI XRa, Rb, s10, optn2 - Load a halfword from memory to XRF,
* post modify address register
*/
static void gen_mxu_s16ldd(DisasContext *ctx, bool postmodify)
{
TCGv t0, t1;
uint32_t XRa, Rb, optn2;
int32_t s10;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
s10 = sextract32(ctx->opcode, 10, 9) * 2;
optn2 = extract32(ctx->opcode, 19, 2);
Rb = extract32(ctx->opcode, 21, 5);
gen_load_gpr(t0, Rb);
tcg_gen_addi_tl(t0, t0, s10);
if (postmodify) {
gen_store_gpr(t0, Rb);
}
switch (optn2) {
/* XRa[15:0] = tmp16 */
case MXU_OPTN2_PTN0:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
gen_load_mxu_gpr(t0, XRa);
tcg_gen_deposit_tl(t0, t0, t1, 0, 16);
break;
/* XRa[31:16] = tmp16 */
case MXU_OPTN2_PTN1:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
gen_load_mxu_gpr(t0, XRa);
tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
break;
/* XRa = sign_extend(tmp16) */
case MXU_OPTN2_PTN2:
tcg_gen_qemu_ld_tl(t0, t0, ctx->mem_idx, MO_SW);
break;
/* XRa = {tmp16, tmp16} */
case MXU_OPTN2_PTN3:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UW);
tcg_gen_deposit_tl(t0, t1, t1, 0, 16);
tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
break;
}
gen_store_mxu_gpr(t0, XRa);
}
/*
* S16STD XRa, Rb, s8, optn2 - Store a byte from XRF to memory
*
* S16SDI XRa, Rb, s8, optn2 - Store a byte from XRF to memory,
* post modify address register
*/
static void gen_mxu_s16std(DisasContext *ctx, bool postmodify)
{
TCGv t0, t1;
uint32_t XRa, Rb, optn2;
int32_t s10;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
s10 = sextract32(ctx->opcode, 10, 9) * 2;
optn2 = extract32(ctx->opcode, 19, 2);
Rb = extract32(ctx->opcode, 21, 5);
if (optn2 > 1) {
/* reserved, do nothing */
return;
}
gen_load_gpr(t0, Rb);
tcg_gen_addi_tl(t0, t0, s10);
if (postmodify) {
gen_store_gpr(t0, Rb);
}
gen_load_mxu_gpr(t1, XRa);
switch (optn2) {
/* XRa[15:0] => tmp16 */
case MXU_OPTN2_PTN0:
tcg_gen_extract_tl(t1, t1, 0, 16);
break;
/* XRa[31:16] => tmp16 */
case MXU_OPTN2_PTN1:
tcg_gen_extract_tl(t1, t1, 16, 16);
break;
}
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx, MO_UW);
}
/*
* S32MUL XRa, XRd, rs, rt - Signed 32x32=>64 bit multiplication
* of GPR's and stores result into pair of MXU registers.
* It strains HI and LO registers.
*
* S32MULU XRa, XRd, rs, rt - Unsigned 32x32=>64 bit multiplication
* of GPR's and stores result into pair of MXU registers.
* It strains HI and LO registers.
*/
static void gen_mxu_s32mul(DisasContext *ctx, bool mulu)
{
TCGv t0, t1;
uint32_t XRa, XRd, rs, rt;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
XRd = extract32(ctx->opcode, 10, 4);
rs = extract32(ctx->opcode, 16, 5);
rt = extract32(ctx->opcode, 21, 5);
if (unlikely(rs == 0 || rt == 0)) {
tcg_gen_movi_tl(t0, 0);
tcg_gen_movi_tl(t1, 0);
} else {
gen_load_gpr(t0, rs);
gen_load_gpr(t1, rt);
if (mulu) {
tcg_gen_mulu2_tl(t0, t1, t0, t1);
} else {
tcg_gen_muls2_tl(t0, t1, t0, t1);
}
}
tcg_gen_mov_tl(cpu_HI[0], t1);
tcg_gen_mov_tl(cpu_LO[0], t0);
gen_store_mxu_gpr(t1, XRa);
gen_store_mxu_gpr(t0, XRd);
}
/*
* D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
* D16MULF XRa, XRb, XRc, optn2 - Signed Q15 fraction pattern multiplication
* with rounding and packing result
* D16MULE XRa, XRb, XRc, XRd, optn2 - Signed Q15 fraction pattern
* multiplication with rounding
*/
static void gen_mxu_d16mul(DisasContext *ctx, bool fractional,
bool packed_result)
{
TCGv t0, t1, t2, t3;
uint32_t XRa, XRb, XRc, XRd, optn2;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
t2 = tcg_temp_new();
t3 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
optn2 = extract32(ctx->opcode, 22, 2);
/*
* TODO: XRd field isn't used for D16MULF
* There's no knowledge how this field affect
* instruction decoding/behavior
*/
gen_load_mxu_gpr(t1, XRb);
tcg_gen_sextract_tl(t0, t1, 0, 16);
tcg_gen_sextract_tl(t1, t1, 16, 16);
gen_load_mxu_gpr(t3, XRc);
tcg_gen_sextract_tl(t2, t3, 0, 16);
tcg_gen_sextract_tl(t3, t3, 16, 16);
switch (optn2) {
case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
tcg_gen_mul_tl(t3, t1, t3);
tcg_gen_mul_tl(t2, t0, t2);
break;
case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
tcg_gen_mul_tl(t3, t0, t3);
tcg_gen_mul_tl(t2, t0, t2);
break;
case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
tcg_gen_mul_tl(t3, t1, t3);
tcg_gen_mul_tl(t2, t1, t2);
break;
case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
tcg_gen_mul_tl(t3, t0, t3);
tcg_gen_mul_tl(t2, t1, t2);
break;
}
if (fractional) {
TCGLabel *l_done = gen_new_label();
TCGv rounding = tcg_temp_new();
tcg_gen_shli_tl(t3, t3, 1);
tcg_gen_shli_tl(t2, t2, 1);
tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
if (packed_result) {
TCGLabel *l_apply_bias_l = gen_new_label();
TCGLabel *l_apply_bias_r = gen_new_label();
TCGLabel *l_half_done = gen_new_label();
TCGv bias = tcg_temp_new();
/*
* D16MULF supports unbiased rounding aka "bankers rounding",
* "round to even", "convergent rounding"
*/
tcg_gen_andi_tl(bias, mxu_CR, 0x4);
tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
tcg_gen_andi_tl(t0, t3, 0x1ffff);
tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
gen_set_label(l_apply_bias_l);
tcg_gen_addi_tl(t3, t3, 0x8000);
gen_set_label(l_half_done);
tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
tcg_gen_andi_tl(t0, t2, 0x1ffff);
tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
gen_set_label(l_apply_bias_r);
tcg_gen_addi_tl(t2, t2, 0x8000);
} else {
/* D16MULE doesn't support unbiased rounding */
tcg_gen_addi_tl(t3, t3, 0x8000);
tcg_gen_addi_tl(t2, t2, 0x8000);
}
gen_set_label(l_done);
}
if (!packed_result) {
gen_store_mxu_gpr(t3, XRa);
gen_store_mxu_gpr(t2, XRd);
} else {
tcg_gen_andi_tl(t3, t3, 0xffff0000);
tcg_gen_shri_tl(t2, t2, 16);
tcg_gen_or_tl(t3, t3, t2);
gen_store_mxu_gpr(t3, XRa);
}
}
/*
* D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
* Signed 16 bit pattern multiply and accumulate
* D16MACF XRa, XRb, XRc, aptn2, optn2
* Signed Q15 fraction pattern multiply accumulate and pack
* D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
* Signed Q15 fraction pattern multiply and accumulate
*/
static void gen_mxu_d16mac(DisasContext *ctx, bool fractional,
bool packed_result)
{
TCGv t0, t1, t2, t3;
uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
t2 = tcg_temp_new();
t3 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
optn2 = extract32(ctx->opcode, 22, 2);
aptn2 = extract32(ctx->opcode, 24, 2);
gen_load_mxu_gpr(t1, XRb);
tcg_gen_sextract_tl(t0, t1, 0, 16);
tcg_gen_sextract_tl(t1, t1, 16, 16);
gen_load_mxu_gpr(t3, XRc);
tcg_gen_sextract_tl(t2, t3, 0, 16);
tcg_gen_sextract_tl(t3, t3, 16, 16);
switch (optn2) {
case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
tcg_gen_mul_tl(t3, t1, t3);
tcg_gen_mul_tl(t2, t0, t2);
break;
case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
tcg_gen_mul_tl(t3, t0, t3);
tcg_gen_mul_tl(t2, t0, t2);
break;
case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
tcg_gen_mul_tl(t3, t1, t3);
tcg_gen_mul_tl(t2, t1, t2);
break;
case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
tcg_gen_mul_tl(t3, t0, t3);
tcg_gen_mul_tl(t2, t1, t2);
break;
}
if (fractional) {
tcg_gen_shli_tl(t3, t3, 1);
tcg_gen_shli_tl(t2, t2, 1);
}
gen_load_mxu_gpr(t0, XRa);
gen_load_mxu_gpr(t1, XRd);
switch (aptn2) {
case MXU_APTN2_AA:
tcg_gen_add_tl(t3, t0, t3);
tcg_gen_add_tl(t2, t1, t2);
break;
case MXU_APTN2_AS:
tcg_gen_add_tl(t3, t0, t3);
tcg_gen_sub_tl(t2, t1, t2);
break;
case MXU_APTN2_SA:
tcg_gen_sub_tl(t3, t0, t3);
tcg_gen_add_tl(t2, t1, t2);
break;
case MXU_APTN2_SS:
tcg_gen_sub_tl(t3, t0, t3);
tcg_gen_sub_tl(t2, t1, t2);
break;
}
if (fractional) {
TCGLabel *l_done = gen_new_label();
TCGv rounding = tcg_temp_new();
tcg_gen_andi_tl(rounding, mxu_CR, 0x2);
tcg_gen_brcondi_tl(TCG_COND_EQ, rounding, 0, l_done);
if (packed_result) {
TCGLabel *l_apply_bias_l = gen_new_label();
TCGLabel *l_apply_bias_r = gen_new_label();
TCGLabel *l_half_done = gen_new_label();
TCGv bias = tcg_temp_new();
/*
* D16MACF supports unbiased rounding aka "bankers rounding",
* "round to even", "convergent rounding"
*/
tcg_gen_andi_tl(bias, mxu_CR, 0x4);
tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_l);
tcg_gen_andi_tl(t0, t3, 0x1ffff);
tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_half_done);
gen_set_label(l_apply_bias_l);
tcg_gen_addi_tl(t3, t3, 0x8000);
gen_set_label(l_half_done);
tcg_gen_brcondi_tl(TCG_COND_NE, bias, 0, l_apply_bias_r);
tcg_gen_andi_tl(t0, t2, 0x1ffff);
tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0x8000, l_done);
gen_set_label(l_apply_bias_r);
tcg_gen_addi_tl(t2, t2, 0x8000);
} else {
/* D16MACE doesn't support unbiased rounding */
tcg_gen_addi_tl(t3, t3, 0x8000);
tcg_gen_addi_tl(t2, t2, 0x8000);
}
gen_set_label(l_done);
}
if (!packed_result) {
gen_store_mxu_gpr(t3, XRa);
gen_store_mxu_gpr(t2, XRd);
} else {
tcg_gen_andi_tl(t3, t3, 0xffff0000);
tcg_gen_shri_tl(t2, t2, 16);
tcg_gen_or_tl(t3, t3, t2);
gen_store_mxu_gpr(t3, XRa);
}
}
/*
* D16MADL XRa, XRb, XRc, XRd, aptn2, optn2 - Double packed
* unsigned 16 bit pattern multiply and add/subtract.
*/
static void gen_mxu_d16madl(DisasContext *ctx)
{
TCGv t0, t1, t2, t3;
uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
t2 = tcg_temp_new();
t3 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
optn2 = extract32(ctx->opcode, 22, 2);
aptn2 = extract32(ctx->opcode, 24, 2);
gen_load_mxu_gpr(t1, XRb);
tcg_gen_sextract_tl(t0, t1, 0, 16);
tcg_gen_sextract_tl(t1, t1, 16, 16);
gen_load_mxu_gpr(t3, XRc);
tcg_gen_sextract_tl(t2, t3, 0, 16);
tcg_gen_sextract_tl(t3, t3, 16, 16);
switch (optn2) {
case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
tcg_gen_mul_tl(t3, t1, t3);
tcg_gen_mul_tl(t2, t0, t2);
break;
case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
tcg_gen_mul_tl(t3, t0, t3);
tcg_gen_mul_tl(t2, t0, t2);
break;
case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
tcg_gen_mul_tl(t3, t1, t3);
tcg_gen_mul_tl(t2, t1, t2);
break;
case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
tcg_gen_mul_tl(t3, t0, t3);
tcg_gen_mul_tl(t2, t1, t2);
break;
}
tcg_gen_extract_tl(t2, t2, 0, 16);
tcg_gen_extract_tl(t3, t3, 0, 16);
gen_load_mxu_gpr(t1, XRa);
tcg_gen_extract_tl(t0, t1, 0, 16);
tcg_gen_extract_tl(t1, t1, 16, 16);
switch (aptn2) {
case MXU_APTN2_AA:
tcg_gen_add_tl(t3, t1, t3);
tcg_gen_add_tl(t2, t0, t2);
break;
case MXU_APTN2_AS:
tcg_gen_add_tl(t3, t1, t3);
tcg_gen_sub_tl(t2, t0, t2);
break;
case MXU_APTN2_SA:
tcg_gen_sub_tl(t3, t1, t3);
tcg_gen_add_tl(t2, t0, t2);
break;
case MXU_APTN2_SS:
tcg_gen_sub_tl(t3, t1, t3);
tcg_gen_sub_tl(t2, t0, t2);
break;
}
tcg_gen_andi_tl(t2, t2, 0xffff);
tcg_gen_shli_tl(t3, t3, 16);
tcg_gen_or_tl(mxu_gpr[XRd - 1], t3, t2);
}
/*
* S16MAD XRa, XRb, XRc, XRd, aptn2, optn2 - Single packed
* signed 16 bit pattern multiply and 32-bit add/subtract.
*/
static void gen_mxu_s16mad(DisasContext *ctx)
{
TCGv t0, t1;
uint32_t XRa, XRb, XRc, XRd, optn2, aptn1, pad;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
optn2 = extract32(ctx->opcode, 22, 2);
aptn1 = extract32(ctx->opcode, 24, 1);
pad = extract32(ctx->opcode, 25, 1);
if (pad) {
/* FIXME check if it influence the result */
}
gen_load_mxu_gpr(t0, XRb);
gen_load_mxu_gpr(t1, XRc);
switch (optn2) {
case MXU_OPTN2_WW: /* XRB.H*XRC.H */
tcg_gen_sextract_tl(t0, t0, 16, 16);
tcg_gen_sextract_tl(t1, t1, 16, 16);
break;
case MXU_OPTN2_LW: /* XRB.L*XRC.L */
tcg_gen_sextract_tl(t0, t0, 0, 16);
tcg_gen_sextract_tl(t1, t1, 0, 16);
break;
case MXU_OPTN2_HW: /* XRB.H*XRC.L */
tcg_gen_sextract_tl(t0, t0, 16, 16);
tcg_gen_sextract_tl(t1, t1, 0, 16);
break;
case MXU_OPTN2_XW: /* XRB.L*XRC.H */
tcg_gen_sextract_tl(t0, t0, 0, 16);
tcg_gen_sextract_tl(t1, t1, 16, 16);
break;
}
tcg_gen_mul_tl(t0, t0, t1);
gen_load_mxu_gpr(t1, XRa);
switch (aptn1) {
case MXU_APTN1_A:
tcg_gen_add_tl(t1, t1, t0);
break;
case MXU_APTN1_S:
tcg_gen_sub_tl(t1, t1, t0);
break;
}
gen_store_mxu_gpr(t1, XRd);
}
/*
* Q8MUL XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
* Q8MULSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
* Q8MAC XRa, XRb, XRc, XRd - Parallel quad unsigned 8 bit multiply
* and accumulate
* Q8MACSU XRa, XRb, XRc, XRd - Parallel quad signed 8 bit multiply
* and accumulate
*/
static void gen_mxu_q8mul_mac(DisasContext *ctx, bool su, bool mac)
{
TCGv t0, t1, t2, t3, t4, t5, t6, t7;
uint32_t XRa, XRb, XRc, XRd, aptn2;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
t2 = tcg_temp_new();
t3 = tcg_temp_new();
t4 = tcg_temp_new();
t5 = tcg_temp_new();
t6 = tcg_temp_new();
t7 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
aptn2 = extract32(ctx->opcode, 24, 2);
gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t7, XRc);
if (su) {
/* Q8MULSU / Q8MACSU */
tcg_gen_sextract_tl(t0, t3, 0, 8);
tcg_gen_sextract_tl(t1, t3, 8, 8);
tcg_gen_sextract_tl(t2, t3, 16, 8);
tcg_gen_sextract_tl(t3, t3, 24, 8);
} else {
/* Q8MUL / Q8MAC */
tcg_gen_extract_tl(t0, t3, 0, 8);
tcg_gen_extract_tl(t1, t3, 8, 8);
tcg_gen_extract_tl(t2, t3, 16, 8);
tcg_gen_extract_tl(t3, t3, 24, 8);
}
tcg_gen_extract_tl(t4, t7, 0, 8);
tcg_gen_extract_tl(t5, t7, 8, 8);
tcg_gen_extract_tl(t6, t7, 16, 8);
tcg_gen_extract_tl(t7, t7, 24, 8);
tcg_gen_mul_tl(t0, t0, t4);
tcg_gen_mul_tl(t1, t1, t5);
tcg_gen_mul_tl(t2, t2, t6);
tcg_gen_mul_tl(t3, t3, t7);
if (mac) {
gen_load_mxu_gpr(t4, XRd);
gen_load_mxu_gpr(t5, XRa);
tcg_gen_extract_tl(t6, t4, 0, 16);
tcg_gen_extract_tl(t7, t4, 16, 16);
if (aptn2 & 1) {
tcg_gen_sub_tl(t0, t6, t0);
tcg_gen_sub_tl(t1, t7, t1);
} else {
tcg_gen_add_tl(t0, t6, t0);
tcg_gen_add_tl(t1, t7, t1);
}
tcg_gen_extract_tl(t6, t5, 0, 16);
tcg_gen_extract_tl(t7, t5, 16, 16);
if (aptn2 & 2) {
tcg_gen_sub_tl(t2, t6, t2);
tcg_gen_sub_tl(t3, t7, t3);
} else {
tcg_gen_add_tl(t2, t6, t2);
tcg_gen_add_tl(t3, t7, t3);
}
}
tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
tcg_gen_deposit_tl(t1, t2, t3, 16, 16);
gen_store_mxu_gpr(t0, XRd);
gen_store_mxu_gpr(t1, XRa);
}
/*
* Q8MADL XRd, XRa, XRb, XRc
* Parallel quad unsigned 8 bit multiply and accumulate.
* e.g. XRd[0..3] = XRa[0..3] + XRb[0..3] * XRc[0..3]
*/
static void gen_mxu_q8madl(DisasContext *ctx)
{
TCGv t0, t1, t2, t3, t4, t5, t6, t7;
uint32_t XRa, XRb, XRc, XRd, aptn2;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
t2 = tcg_temp_new();
t3 = tcg_temp_new();
t4 = tcg_temp_new();
t5 = tcg_temp_new();
t6 = tcg_temp_new();
t7 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
aptn2 = extract32(ctx->opcode, 24, 2);
gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t7, XRc);
tcg_gen_extract_tl(t0, t3, 0, 8);
tcg_gen_extract_tl(t1, t3, 8, 8);
tcg_gen_extract_tl(t2, t3, 16, 8);
tcg_gen_extract_tl(t3, t3, 24, 8);
tcg_gen_extract_tl(t4, t7, 0, 8);
tcg_gen_extract_tl(t5, t7, 8, 8);
tcg_gen_extract_tl(t6, t7, 16, 8);
tcg_gen_extract_tl(t7, t7, 24, 8);
tcg_gen_mul_tl(t0, t0, t4);
tcg_gen_mul_tl(t1, t1, t5);
tcg_gen_mul_tl(t2, t2, t6);
tcg_gen_mul_tl(t3, t3, t7);
gen_load_mxu_gpr(t4, XRa);
tcg_gen_extract_tl(t6, t4, 0, 8);
tcg_gen_extract_tl(t7, t4, 8, 8);
if (aptn2 & 1) {
tcg_gen_sub_tl(t0, t6, t0);
tcg_gen_sub_tl(t1, t7, t1);
} else {
tcg_gen_add_tl(t0, t6, t0);
tcg_gen_add_tl(t1, t7, t1);
}
tcg_gen_extract_tl(t6, t4, 16, 8);
tcg_gen_extract_tl(t7, t4, 24, 8);
if (aptn2 & 2) {
tcg_gen_sub_tl(t2, t6, t2);
tcg_gen_sub_tl(t3, t7, t3);
} else {
tcg_gen_add_tl(t2, t6, t2);
tcg_gen_add_tl(t3, t7, t3);
}
tcg_gen_andi_tl(t5, t0, 0xff);
tcg_gen_deposit_tl(t5, t5, t1, 8, 8);
tcg_gen_deposit_tl(t5, t5, t2, 16, 8);
tcg_gen_deposit_tl(t5, t5, t3, 24, 8);
gen_store_mxu_gpr(t5, XRd);
}
/*
* S32LDD XRa, Rb, S12 - Load a word from memory to XRF
* S32LDDR XRa, Rb, S12 - Load a word from memory to XRF
* in reversed byte seq.
* S32LDI XRa, Rb, S12 - Load a word from memory to XRF,
* post modify base address GPR.
* S32LDIR XRa, Rb, S12 - Load a word from memory to XRF,
* post modify base address GPR and load in reversed byte seq.
*/
static void gen_mxu_s32ldxx(DisasContext *ctx, bool reversed, bool postinc)
{
TCGv t0, t1;
uint32_t XRa, Rb, s12;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
s12 = sextract32(ctx->opcode, 10, 10);
Rb = extract32(ctx->opcode, 21, 5);
gen_load_gpr(t0, Rb);
tcg_gen_movi_tl(t1, s12 * 4);
tcg_gen_add_tl(t0, t0, t1);
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
(MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
ctx->default_tcg_memop_mask);
gen_store_mxu_gpr(t1, XRa);
if (postinc) {
gen_store_gpr(t0, Rb);
}
}
/*
* S32STD XRa, Rb, S12 - Store a word from XRF to memory
* S32STDR XRa, Rb, S12 - Store a word from XRF to memory
* in reversed byte seq.
* S32SDI XRa, Rb, S12 - Store a word from XRF to memory,
* post modify base address GPR.
* S32SDIR XRa, Rb, S12 - Store a word from XRF to memory,
* post modify base address GPR and store in reversed byte seq.
*/
static void gen_mxu_s32stxx(DisasContext *ctx, bool reversed, bool postinc)
{
TCGv t0, t1;
uint32_t XRa, Rb, s12;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
s12 = sextract32(ctx->opcode, 10, 10);
Rb = extract32(ctx->opcode, 21, 5);
gen_load_gpr(t0, Rb);
tcg_gen_movi_tl(t1, s12 * 4);
tcg_gen_add_tl(t0, t0, t1);
gen_load_mxu_gpr(t1, XRa);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
(MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
ctx->default_tcg_memop_mask);
if (postinc) {
gen_store_gpr(t0, Rb);
}
}
/*
* S32LDDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
* S32LDDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
* in reversed byte seq.
* S32LDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
* post modify base address GPR.
* S32LDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
* post modify base address GPR and load in reversed byte seq.
*/
static void gen_mxu_s32ldxvx(DisasContext *ctx, bool reversed,
bool postinc, uint32_t strd2)
{
TCGv t0, t1;
uint32_t XRa, Rb, Rc;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
Rc = extract32(ctx->opcode, 16, 5);
Rb = extract32(ctx->opcode, 21, 5);
gen_load_gpr(t0, Rb);
gen_load_gpr(t1, Rc);
tcg_gen_shli_tl(t1, t1, strd2);
tcg_gen_add_tl(t0, t0, t1);
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx,
(MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
ctx->default_tcg_memop_mask);
gen_store_mxu_gpr(t1, XRa);
if (postinc) {
gen_store_gpr(t0, Rb);
}
}
/*
* LXW Ra, Rb, Rc, STRD2 - Load a word from memory to GPR
* LXB Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
* sign extending to GPR size.
* LXH Ra, Rb, Rc, STRD2 - Load a byte from memory to GPR,
* sign extending to GPR size.
* LXBU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
* zero extending to GPR size.
* LXHU Ra, Rb, Rc, STRD2 - Load a halfword from memory to GPR,
* zero extending to GPR size.
*/
static void gen_mxu_lxx(DisasContext *ctx, uint32_t strd2, MemOp mop)
{
TCGv t0, t1;
uint32_t Ra, Rb, Rc;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
Ra = extract32(ctx->opcode, 11, 5);
Rc = extract32(ctx->opcode, 16, 5);
Rb = extract32(ctx->opcode, 21, 5);
gen_load_gpr(t0, Rb);
gen_load_gpr(t1, Rc);
tcg_gen_shli_tl(t1, t1, strd2);
tcg_gen_add_tl(t0, t0, t1);
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, mop | ctx->default_tcg_memop_mask);
gen_store_gpr(t1, Ra);
}
/*
* S32STDV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
* S32STDVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF
* in reversed byte seq.
* S32SDIV XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
* post modify base address GPR.
* S32SDIVR XRa, Rb, Rc, STRD2 - Load a word from memory to XRF,
* post modify base address GPR and store in reversed byte seq.
*/
static void gen_mxu_s32stxvx(DisasContext *ctx, bool reversed,
bool postinc, uint32_t strd2)
{
TCGv t0, t1;
uint32_t XRa, Rb, Rc;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
Rc = extract32(ctx->opcode, 16, 5);
Rb = extract32(ctx->opcode, 21, 5);
gen_load_gpr(t0, Rb);
gen_load_gpr(t1, Rc);
tcg_gen_shli_tl(t1, t1, strd2);
tcg_gen_add_tl(t0, t0, t1);
gen_load_mxu_gpr(t1, XRa);
tcg_gen_qemu_st_tl(t1, t0, ctx->mem_idx,
(MO_TESL ^ (reversed ? MO_BSWAP : 0)) |
ctx->default_tcg_memop_mask);
if (postinc) {
gen_store_gpr(t0, Rb);
}
}
/*
* MXU instruction category: logic
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* S32NOR S32AND S32OR S32XOR
*/
/*
* S32NOR XRa, XRb, XRc
* Update XRa with the result of logical bitwise 'nor' operation
* applied to the content of XRb and XRc.
*/
static void gen_mxu_S32NOR(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to all 1s */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0xFFFFFFFF);
} else if (unlikely(XRb == 0)) {
/* XRb zero register -> just set destination to the negation of XRc */
tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
} else if (unlikely(XRc == 0)) {
/* XRa zero register -> just set destination to the negation of XRb */
tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to the negation of XRb */
tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
tcg_gen_nor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
}
}
/*
* S32AND XRa, XRb, XRc
* Update XRa with the result of logical bitwise 'and' operation
* applied to the content of XRb and XRc.
*/
static void gen_mxu_S32AND(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) || (XRc == 0))) {
/* one of operands zero register -> just set destination to all 0s */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
tcg_gen_and_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
}
}
/*
* S32OR XRa, XRb, XRc
* Update XRa with the result of logical bitwise 'or' operation
* applied to the content of XRb and XRc.
*/
static void gen_mxu_S32OR(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to all 0s */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == 0)) {
/* XRb zero register -> just set destination to the content of XRc */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
} else if (unlikely(XRc == 0)) {
/* XRc zero register -> just set destination to the content of XRb */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
}
}
/*
* S32XOR XRa, XRb, XRc
* Update XRa with the result of logical bitwise 'xor' operation
* applied to the content of XRb and XRc.
*/
static void gen_mxu_S32XOR(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to all 0s */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == 0)) {
/* XRb zero register -> just set destination to the content of XRc */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
} else if (unlikely(XRc == 0)) {
/* XRc zero register -> just set destination to the content of XRb */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to all 0s */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else {
/* the most general case */
tcg_gen_xor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
}
}
/*
* MXU instruction category: shift
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* D32SLL D32SLR D32SAR D32SARL
* D32SLLV D32SLRV D32SARV D32SARW
* Q16SLL Q16SLR Q16SAR
* Q16SLLV Q16SLRV Q16SARV
*/
/*
* D32SLL XRa, XRd, XRb, XRc, SFT4
* Dual 32-bit shift left from XRb and XRc to SFT4
* bits (0..15). Store to XRa and XRd respectively.
* D32SLR XRa, XRd, XRb, XRc, SFT4
* Dual 32-bit shift logic right from XRb and XRc
* to SFT4 bits (0..15). Store to XRa and XRd respectively.
* D32SAR XRa, XRd, XRb, XRc, SFT4
* Dual 32-bit shift arithmetic right from XRb and XRc
* to SFT4 bits (0..15). Store to XRa and XRd respectively.
*/
static void gen_mxu_d32sxx(DisasContext *ctx, bool right, bool arithmetic)
{
uint32_t XRa, XRb, XRc, XRd, sft4;
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
sft4 = extract32(ctx->opcode, 22, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
gen_load_mxu_gpr(t0, XRb);
gen_load_mxu_gpr(t1, XRc);
if (right) {
if (arithmetic) {
tcg_gen_sari_tl(t0, t0, sft4);
tcg_gen_sari_tl(t1, t1, sft4);
} else {
tcg_gen_shri_tl(t0, t0, sft4);
tcg_gen_shri_tl(t1, t1, sft4);
}
} else {
tcg_gen_shli_tl(t0, t0, sft4);
tcg_gen_shli_tl(t1, t1, sft4);
}
gen_store_mxu_gpr(t0, XRa);
gen_store_mxu_gpr(t1, XRd);
}
/*
* D32SLLV XRa, XRd, rs
* Dual 32-bit shift left from XRa and XRd to rs[3:0]
* bits. Store back to XRa and XRd respectively.
* D32SLRV XRa, XRd, rs
* Dual 32-bit shift logic right from XRa and XRd to rs[3:0]
* bits. Store back to XRa and XRd respectively.
* D32SARV XRa, XRd, rs
* Dual 32-bit shift arithmetic right from XRa and XRd to rs[3:0]
* bits. Store back to XRa and XRd respectively.
*/
static void gen_mxu_d32sxxv(DisasContext *ctx, bool right, bool arithmetic)
{
uint32_t XRa, XRd, rs;
XRa = extract32(ctx->opcode, 10, 4);
XRd = extract32(ctx->opcode, 14, 4);
rs = extract32(ctx->opcode, 21, 5);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
gen_load_mxu_gpr(t0, XRa);
gen_load_mxu_gpr(t1, XRd);
gen_load_gpr(t2, rs);
tcg_gen_andi_tl(t2, t2, 0x0f);
if (right) {
if (arithmetic) {
tcg_gen_sar_tl(t0, t0, t2);
tcg_gen_sar_tl(t1, t1, t2);
} else {
tcg_gen_shr_tl(t0, t0, t2);
tcg_gen_shr_tl(t1, t1, t2);
}
} else {
tcg_gen_shl_tl(t0, t0, t2);
tcg_gen_shl_tl(t1, t1, t2);
}
gen_store_mxu_gpr(t0, XRa);
gen_store_mxu_gpr(t1, XRd);
}
/*
* D32SARL XRa, XRb, XRc, SFT4
* Dual shift arithmetic right 32-bit integers in XRb and XRc
* to SFT4 bits (0..15). Pack 16 LSBs of each into XRa.
*
* D32SARW XRa, XRb, XRc, rb
* Dual shift arithmetic right 32-bit integers in XRb and XRc
* to rb[3:0] bits. Pack 16 LSBs of each into XRa.
*/
static void gen_mxu_d32sarl(DisasContext *ctx, bool sarw)
{
uint32_t XRa, XRb, XRc, rb;
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
rb = extract32(ctx->opcode, 21, 5);
if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else {
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
if (!sarw) {
/* Make SFT4 from rb field */
tcg_gen_movi_tl(t2, rb >> 1);
} else {
gen_load_gpr(t2, rb);
tcg_gen_andi_tl(t2, t2, 0x0f);
}
gen_load_mxu_gpr(t0, XRb);
gen_load_mxu_gpr(t1, XRc);
tcg_gen_sar_tl(t0, t0, t2);
tcg_gen_sar_tl(t1, t1, t2);
tcg_gen_extract_tl(t2, t1, 0, 16);
tcg_gen_deposit_tl(t2, t2, t0, 16, 16);
gen_store_mxu_gpr(t2, XRa);
}
}
/*
* Q16SLL XRa, XRd, XRb, XRc, SFT4
* Quad 16-bit shift left from XRb and XRc to SFT4
* bits (0..15). Store to XRa and XRd respectively.
* Q16SLR XRa, XRd, XRb, XRc, SFT4
* Quad 16-bit shift logic right from XRb and XRc
* to SFT4 bits (0..15). Store to XRa and XRd respectively.
* Q16SAR XRa, XRd, XRb, XRc, SFT4
* Quad 16-bit shift arithmetic right from XRb and XRc
* to SFT4 bits (0..15). Store to XRa and XRd respectively.
*/
static void gen_mxu_q16sxx(DisasContext *ctx, bool right, bool arithmetic)
{
uint32_t XRa, XRb, XRc, XRd, sft4;
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
sft4 = extract32(ctx->opcode, 22, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
gen_load_mxu_gpr(t0, XRb);
gen_load_mxu_gpr(t2, XRc);
if (arithmetic) {
tcg_gen_sextract_tl(t1, t0, 16, 16);
tcg_gen_sextract_tl(t0, t0, 0, 16);
tcg_gen_sextract_tl(t3, t2, 16, 16);
tcg_gen_sextract_tl(t2, t2, 0, 16);
} else {
tcg_gen_extract_tl(t1, t0, 16, 16);
tcg_gen_extract_tl(t0, t0, 0, 16);
tcg_gen_extract_tl(t3, t2, 16, 16);
tcg_gen_extract_tl(t2, t2, 0, 16);
}
if (right) {
if (arithmetic) {
tcg_gen_sari_tl(t0, t0, sft4);
tcg_gen_sari_tl(t1, t1, sft4);
tcg_gen_sari_tl(t2, t2, sft4);
tcg_gen_sari_tl(t3, t3, sft4);
} else {
tcg_gen_shri_tl(t0, t0, sft4);
tcg_gen_shri_tl(t1, t1, sft4);
tcg_gen_shri_tl(t2, t2, sft4);
tcg_gen_shri_tl(t3, t3, sft4);
}
} else {
tcg_gen_shli_tl(t0, t0, sft4);
tcg_gen_shli_tl(t1, t1, sft4);
tcg_gen_shli_tl(t2, t2, sft4);
tcg_gen_shli_tl(t3, t3, sft4);
}
tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
gen_store_mxu_gpr(t0, XRa);
gen_store_mxu_gpr(t2, XRd);
}
/*
* Q16SLLV XRa, XRd, rs
* Quad 16-bit shift left from XRa and XRd to rs[3:0]
* bits. Store to XRa and XRd respectively.
* Q16SLRV XRa, XRd, rs
* Quad 16-bit shift logic right from XRa and XRd to rs[3:0]
* bits. Store to XRa and XRd respectively.
* Q16SARV XRa, XRd, rs
* Quad 16-bit shift arithmetic right from XRa and XRd to rs[3:0]
* bits. Store to XRa and XRd respectively.
*/
static void gen_mxu_q16sxxv(DisasContext *ctx, bool right, bool arithmetic)
{
uint32_t XRa, XRd, rs;
XRa = extract32(ctx->opcode, 10, 4);
XRd = extract32(ctx->opcode, 14, 4);
rs = extract32(ctx->opcode, 21, 5);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t5 = tcg_temp_new();
gen_load_mxu_gpr(t0, XRa);
gen_load_mxu_gpr(t2, XRd);
gen_load_gpr(t5, rs);
tcg_gen_andi_tl(t5, t5, 0x0f);
if (arithmetic) {
tcg_gen_sextract_tl(t1, t0, 16, 16);
tcg_gen_sextract_tl(t0, t0, 0, 16);
tcg_gen_sextract_tl(t3, t2, 16, 16);
tcg_gen_sextract_tl(t2, t2, 0, 16);
} else {
tcg_gen_extract_tl(t1, t0, 16, 16);
tcg_gen_extract_tl(t0, t0, 0, 16);
tcg_gen_extract_tl(t3, t2, 16, 16);
tcg_gen_extract_tl(t2, t2, 0, 16);
}
if (right) {
if (arithmetic) {
tcg_gen_sar_tl(t0, t0, t5);
tcg_gen_sar_tl(t1, t1, t5);
tcg_gen_sar_tl(t2, t2, t5);
tcg_gen_sar_tl(t3, t3, t5);
} else {
tcg_gen_shr_tl(t0, t0, t5);
tcg_gen_shr_tl(t1, t1, t5);
tcg_gen_shr_tl(t2, t2, t5);
tcg_gen_shr_tl(t3, t3, t5);
}
} else {
tcg_gen_shl_tl(t0, t0, t5);
tcg_gen_shl_tl(t1, t1, t5);
tcg_gen_shl_tl(t2, t2, t5);
tcg_gen_shl_tl(t3, t3, t5);
}
tcg_gen_deposit_tl(t0, t0, t1, 16, 16);
tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
gen_store_mxu_gpr(t0, XRa);
gen_store_mxu_gpr(t2, XRd);
}
/*
* MXU instruction category max/min/avg
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* S32MAX D16MAX Q8MAX
* S32MIN D16MIN Q8MIN
* S32SLT D16SLT Q8SLT
* Q8SLTU
* D16AVG Q8AVG
* D16AVGR Q8AVGR
* S32MOVZ D16MOVZ Q8MOVZ
* S32MOVN D16MOVN Q8MOVN
*/
/*
* S32MAX XRa, XRb, XRc
* Update XRa with the maximum of signed 32-bit integers contained
* in XRb and XRc.
*
* S32MIN XRa, XRb, XRc
* Update XRa with the minimum of signed 32-bit integers contained
* in XRb and XRc.
*/
static void gen_mxu_S32MAX_S32MIN(DisasContext *ctx)
{
uint32_t pad, opc, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
opc = extract32(ctx->opcode, 18, 3);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else if (unlikely((XRb == 0) || (XRc == 0))) {
/* exactly one operand is zero register - find which one is not...*/
uint32_t XRx = XRb ? XRb : XRc;
/* ...and do max/min operation with one operand 0 */
if (opc == OPC_MXU_S32MAX) {
tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
} else {
tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
}
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
if (opc == OPC_MXU_S32MAX) {
tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
mxu_gpr[XRc - 1]);
} else {
tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
mxu_gpr[XRc - 1]);
}
}
}
/*
* D16MAX
* Update XRa with the 16-bit-wise maximums of signed integers
* contained in XRb and XRc.
*
* D16MIN
* Update XRa with the 16-bit-wise minimums of signed integers
* contained in XRb and XRc.
*/
static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
{
uint32_t pad, opc, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
opc = extract32(ctx->opcode, 18, 3);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else if (unlikely((XRb == 0) || (XRc == 0))) {
/* exactly one operand is zero register - find which one is not...*/
uint32_t XRx = XRb ? XRb : XRc;
/* ...and do half-word-wise max/min with one operand 0 */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_constant_i32(0);
TCGv_i32 t2 = tcg_temp_new();
/* the left half-word first */
tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000);
if (opc == OPC_MXU_D16MAX) {
tcg_gen_smax_i32(t2, t0, t1);
} else {
tcg_gen_smin_i32(t2, t0, t1);
}
/* the right half-word */
tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0x0000FFFF);
/* move half-words to the leftmost position */
tcg_gen_shli_i32(t0, t0, 16);
/* t0 will be max/min of t0 and t1 */
if (opc == OPC_MXU_D16MAX) {
tcg_gen_smax_i32(t0, t0, t1);
} else {
tcg_gen_smin_i32(t0, t0, t1);
}
/* return resulting half-words to its original position */
tcg_gen_shri_i32(t0, t0, 16);
/* finally update the destination */
tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
TCGv_i32 t2 = tcg_temp_new();
/* the left half-word first */
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
if (opc == OPC_MXU_D16MAX) {
tcg_gen_smax_i32(t2, t0, t1);
} else {
tcg_gen_smin_i32(t2, t0, t1);
}
/* the right half-word */
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0x0000FFFF);
/* move half-words to the leftmost position */
tcg_gen_shli_i32(t0, t0, 16);
tcg_gen_shli_i32(t1, t1, 16);
/* t0 will be max/min of t0 and t1 */
if (opc == OPC_MXU_D16MAX) {
tcg_gen_smax_i32(t0, t0, t1);
} else {
tcg_gen_smin_i32(t0, t0, t1);
}
/* return resulting half-words to its original position */
tcg_gen_shri_i32(t0, t0, 16);
/* finally update the destination */
tcg_gen_or_i32(mxu_gpr[XRa - 1], t2, t0);
}
}
/*
* Q8MAX
* Update XRa with the 8-bit-wise maximums of signed integers
* contained in XRb and XRc.
*
* Q8MIN
* Update XRa with the 8-bit-wise minimums of signed integers
* contained in XRb and XRc.
*/
static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
{
uint32_t pad, opc, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
opc = extract32(ctx->opcode, 18, 3);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else if (unlikely((XRb == 0) || (XRc == 0))) {
/* exactly one operand is zero register - make it be the first...*/
uint32_t XRx = XRb ? XRb : XRc;
/* ...and do byte-wise max/min with one operand 0 */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_constant_i32(0);
TCGv_i32 t2 = tcg_temp_new();
int32_t i;
/* the leftmost byte (byte 3) first */
tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000);
if (opc == OPC_MXU_Q8MAX) {
tcg_gen_smax_i32(t2, t0, t1);
} else {
tcg_gen_smin_i32(t2, t0, t1);
}
/* bytes 2, 1, 0 */
for (i = 2; i >= 0; i--) {
/* extract the byte */
tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF << (8 * i));
/* move the byte to the leftmost position */
tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
/* t0 will be max/min of t0 and t1 */
if (opc == OPC_MXU_Q8MAX) {
tcg_gen_smax_i32(t0, t0, t1);
} else {
tcg_gen_smin_i32(t0, t0, t1);
}
/* return resulting byte to its original position */
tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
/* finally update the destination */
tcg_gen_or_i32(t2, t2, t0);
}
gen_store_mxu_gpr(t2, XRa);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
TCGv_i32 t2 = tcg_temp_new();
int32_t i;
/* the leftmost bytes (bytes 3) first */
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
if (opc == OPC_MXU_Q8MAX) {
tcg_gen_smax_i32(t2, t0, t1);
} else {
tcg_gen_smin_i32(t2, t0, t1);
}
/* bytes 2, 1, 0 */
for (i = 2; i >= 0; i--) {
/* extract corresponding bytes */
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF << (8 * i));
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF << (8 * i));
/* move the bytes to the leftmost position */
tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
tcg_gen_shli_i32(t1, t1, 8 * (3 - i));
/* t0 will be max/min of t0 and t1 */
if (opc == OPC_MXU_Q8MAX) {
tcg_gen_smax_i32(t0, t0, t1);
} else {
tcg_gen_smin_i32(t0, t0, t1);
}
/* return resulting byte to its original position */
tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
/* finally update the destination */
tcg_gen_or_i32(t2, t2, t0);
}
gen_store_mxu_gpr(t2, XRa);
}
}
/*
* Q8SLT
* Update XRa with the signed "set less than" comparison of XRb and XRc
* on per-byte basis.
* a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
*
* Q8SLTU
* Update XRa with the unsigned "set less than" comparison of XRb and XRc
* on per-byte basis.
* a.k.a. XRa[0..3] = XRb[0..3] < XRc[0..3] ? 1 : 0;
*/
static void gen_mxu_q8slt(DisasContext *ctx, bool sltu)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == XRc)) {
/* both operands same registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();
gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t4, XRc);
tcg_gen_movi_tl(t2, 0);
for (int i = 0; i < 4; i++) {
if (sltu) {
tcg_gen_extract_tl(t0, t3, 8 * i, 8);
tcg_gen_extract_tl(t1, t4, 8 * i, 8);
} else {
tcg_gen_sextract_tl(t0, t3, 8 * i, 8);
tcg_gen_sextract_tl(t1, t4, 8 * i, 8);
}
tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
}
gen_store_mxu_gpr(t2, XRa);
}
}
/*
* S32SLT
* Update XRa with the signed "set less than" comparison of XRb and XRc.
* a.k.a. XRa = XRb < XRc ? 1 : 0;
*/
static void gen_mxu_S32SLT(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == XRc)) {
/* both operands same registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
gen_load_mxu_gpr(t0, XRb);
gen_load_mxu_gpr(t1, XRc);
tcg_gen_setcond_tl(TCG_COND_LT, mxu_gpr[XRa - 1], t0, t1);
}
}
/*
* D16SLT
* Update XRa with the signed "set less than" comparison of XRb and XRc
* on per-word basis.
* a.k.a. XRa[0..1] = XRb[0..1] < XRc[0..1] ? 1 : 0;
*/
static void gen_mxu_D16SLT(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == XRc)) {
/* both operands same registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();
gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t4, XRc);
tcg_gen_sextract_tl(t0, t3, 16, 16);
tcg_gen_sextract_tl(t1, t4, 16, 16);
tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
tcg_gen_shli_tl(t2, t0, 16);
tcg_gen_sextract_tl(t0, t3, 0, 16);
tcg_gen_sextract_tl(t1, t4, 0, 16);
tcg_gen_setcond_tl(TCG_COND_LT, t0, t0, t1);
tcg_gen_or_tl(mxu_gpr[XRa - 1], t2, t0);
}
}
/*
* D16AVG
* Update XRa with the signed average of XRb and XRc
* on per-word basis, rounding down.
* a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1]) >> 1;
*
* D16AVGR
* Update XRa with the signed average of XRb and XRc
* on per-word basis, math rounding 4/5.
* a.k.a. XRa[0..1] = (XRb[0..1] + XRc[0..1] + 1) >> 1;
*/
static void gen_mxu_d16avg(DisasContext *ctx, bool round45)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == XRc)) {
/* both operands same registers -> just set destination to same */
tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();
gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t4, XRc);
tcg_gen_sextract_tl(t0, t3, 16, 16);
tcg_gen_sextract_tl(t1, t4, 16, 16);
tcg_gen_add_tl(t0, t0, t1);
if (round45) {
tcg_gen_addi_tl(t0, t0, 1);
}
tcg_gen_shli_tl(t2, t0, 15);
tcg_gen_andi_tl(t2, t2, 0xffff0000);
tcg_gen_sextract_tl(t0, t3, 0, 16);
tcg_gen_sextract_tl(t1, t4, 0, 16);
tcg_gen_add_tl(t0, t0, t1);
if (round45) {
tcg_gen_addi_tl(t0, t0, 1);
}
tcg_gen_shri_tl(t0, t0, 1);
tcg_gen_deposit_tl(t2, t2, t0, 0, 16);
gen_store_mxu_gpr(t2, XRa);
}
}
/*
* Q8AVG
* Update XRa with the signed average of XRb and XRc
* on per-byte basis, rounding down.
* a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3]) >> 1;
*
* Q8AVGR
* Update XRa with the signed average of XRb and XRc
* on per-word basis, math rounding 4/5.
* a.k.a. XRa[0..3] = (XRb[0..3] + XRc[0..3] + 1) >> 1;
*/
static void gen_mxu_q8avg(DisasContext *ctx, bool round45)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == XRc)) {
/* both operands same registers -> just set destination to same */
tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();
gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t4, XRc);
tcg_gen_movi_tl(t2, 0);
for (int i = 0; i < 4; i++) {
tcg_gen_extract_tl(t0, t3, 8 * i, 8);
tcg_gen_extract_tl(t1, t4, 8 * i, 8);
tcg_gen_add_tl(t0, t0, t1);
if (round45) {
tcg_gen_addi_tl(t0, t0, 1);
}
tcg_gen_shri_tl(t0, t0, 1);
tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
}
gen_store_mxu_gpr(t2, XRa);
}
}
/*
* Q8MOVZ
* Quadruple 8-bit packed conditional move where
* XRb contains conditions, XRc what to move and
* XRa is the destination.
* a.k.a. if (XRb[0..3] == 0) { XRa[0..3] = XRc[0..3] }
*
* Q8MOVN
* Quadruple 8-bit packed conditional move where
* XRb contains conditions, XRc what to move and
* XRa is the destination.
* a.k.a. if (XRb[0..3] != 0) { XRa[0..3] = XRc[0..3] }
*/
static void gen_mxu_q8movzn(DisasContext *ctx, TCGCond cond)
{
uint32_t XRc, XRb, XRa;
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGLabel *l_quarterdone = gen_new_label();
TCGLabel *l_halfdone = gen_new_label();
TCGLabel *l_quarterrest = gen_new_label();
TCGLabel *l_done = gen_new_label();
gen_load_mxu_gpr(t0, XRc);
gen_load_mxu_gpr(t1, XRb);
gen_load_mxu_gpr(t2, XRa);
tcg_gen_extract_tl(t3, t1, 24, 8);
tcg_gen_brcondi_tl(cond, t3, 0, l_quarterdone);
tcg_gen_extract_tl(t3, t0, 24, 8);
tcg_gen_deposit_tl(t2, t2, t3, 24, 8);
gen_set_label(l_quarterdone);
tcg_gen_extract_tl(t3, t1, 16, 8);
tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
tcg_gen_extract_tl(t3, t0, 16, 8);
tcg_gen_deposit_tl(t2, t2, t3, 16, 8);
gen_set_label(l_halfdone);
tcg_gen_extract_tl(t3, t1, 8, 8);
tcg_gen_brcondi_tl(cond, t3, 0, l_quarterrest);
tcg_gen_extract_tl(t3, t0, 8, 8);
tcg_gen_deposit_tl(t2, t2, t3, 8, 8);
gen_set_label(l_quarterrest);
tcg_gen_extract_tl(t3, t1, 0, 8);
tcg_gen_brcondi_tl(cond, t3, 0, l_done);
tcg_gen_extract_tl(t3, t0, 0, 8);
tcg_gen_deposit_tl(t2, t2, t3, 0, 8);
gen_set_label(l_done);
gen_store_mxu_gpr(t2, XRa);
}
/*
* D16MOVZ
* Double 16-bit packed conditional move where
* XRb contains conditions, XRc what to move and
* XRa is the destination.
* a.k.a. if (XRb[0..1] == 0) { XRa[0..1] = XRc[0..1] }
*
* D16MOVN
* Double 16-bit packed conditional move where
* XRb contains conditions, XRc what to move and
* XRa is the destination.
* a.k.a. if (XRb[0..3] != 0) { XRa[0..1] = XRc[0..1] }
*/
static void gen_mxu_d16movzn(DisasContext *ctx, TCGCond cond)
{
uint32_t XRc, XRb, XRa;
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGLabel *l_halfdone = gen_new_label();
TCGLabel *l_done = gen_new_label();
gen_load_mxu_gpr(t0, XRc);
gen_load_mxu_gpr(t1, XRb);
gen_load_mxu_gpr(t2, XRa);
tcg_gen_extract_tl(t3, t1, 16, 16);
tcg_gen_brcondi_tl(cond, t3, 0, l_halfdone);
tcg_gen_extract_tl(t3, t0, 16, 16);
tcg_gen_deposit_tl(t2, t2, t3, 16, 16);
gen_set_label(l_halfdone);
tcg_gen_extract_tl(t3, t1, 0, 16);
tcg_gen_brcondi_tl(cond, t3, 0, l_done);
tcg_gen_extract_tl(t3, t0, 0, 16);
tcg_gen_deposit_tl(t2, t2, t3, 0, 16);
gen_set_label(l_done);
gen_store_mxu_gpr(t2, XRa);
}
/*
* S32MOVZ
* Quadruple 32-bit conditional move where
* XRb contains conditions, XRc what to move and
* XRa is the destination.
* a.k.a. if (XRb == 0) { XRa = XRc }
*
* S32MOVN
* Single 32-bit conditional move where
* XRb contains conditions, XRc what to move and
* XRa is the destination.
* a.k.a. if (XRb != 0) { XRa = XRc }
*/
static void gen_mxu_s32movzn(DisasContext *ctx, TCGCond cond)
{
uint32_t XRc, XRb, XRa;
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGLabel *l_done = gen_new_label();
gen_load_mxu_gpr(t0, XRc);
gen_load_mxu_gpr(t1, XRb);
tcg_gen_brcondi_tl(cond, t1, 0, l_done);
gen_store_mxu_gpr(t0, XRa);
gen_set_label(l_done);
}
/*
* MXU instruction category: Addition and subtraction
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* S32CPS D16CPS
* Q8ADD
*/
/*
* S32CPS
* Update XRa if XRc < 0 by value of 0 - XRb
* else XRa = XRb
*/
static void gen_mxu_S32CPS(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely(XRb == 0)) {
/* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRc == 0)) {
/* condition always false -> just move XRb to XRa */
tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGLabel *l_not_less = gen_new_label();
TCGLabel *l_done = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_GE, mxu_gpr[XRc - 1], 0, l_not_less);
tcg_gen_neg_tl(t0, mxu_gpr[XRb - 1]);
tcg_gen_br(l_done);
gen_set_label(l_not_less);
gen_load_mxu_gpr(t0, XRb);
gen_set_label(l_done);
gen_store_mxu_gpr(t0, XRa);
}
}
/*
* D16CPS
* Update XRa[0..1] if XRc[0..1] < 0 by value of 0 - XRb[0..1]
* else XRa[0..1] = XRb[0..1]
*/
static void gen_mxu_D16CPS(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely(XRb == 0)) {
/* XRc make no sense 0 - 0 = 0 -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRc == 0)) {
/* condition always false -> just move XRb to XRa */
tcg_gen_mov_tl(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGLabel *l_done_hi = gen_new_label();
TCGLabel *l_not_less_lo = gen_new_label();
TCGLabel *l_done_lo = gen_new_label();
tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 16, 16);
tcg_gen_sextract_tl(t1, mxu_gpr[XRb - 1], 16, 16);
tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_done_hi);
tcg_gen_subfi_tl(t1, 0, t1);
gen_set_label(l_done_hi);
tcg_gen_shli_i32(t1, t1, 16);
tcg_gen_sextract_tl(t0, mxu_gpr[XRc - 1], 0, 16);
tcg_gen_brcondi_tl(TCG_COND_GE, t0, 0, l_not_less_lo);
tcg_gen_sextract_tl(t0, mxu_gpr[XRb - 1], 0, 16);
tcg_gen_subfi_tl(t0, 0, t0);
tcg_gen_br(l_done_lo);
gen_set_label(l_not_less_lo);
tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 16);
gen_set_label(l_done_lo);
tcg_gen_deposit_tl(mxu_gpr[XRa - 1], t1, t0, 0, 16);
}
}
/*
* Q8ABD XRa, XRb, XRc
* Gets absolute difference for quadruple of 8-bit
* packed in XRb to another one in XRc,
* put the result in XRa.
* a.k.a. XRa[0..3] = abs(XRb[0..3] - XRc[0..3]);
*/
static void gen_mxu_Q8ABD(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 3);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();
gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t4, XRc);
tcg_gen_movi_tl(t2, 0);
for (int i = 0; i < 4; i++) {
tcg_gen_extract_tl(t0, t3, 8 * i, 8);
tcg_gen_extract_tl(t1, t4, 8 * i, 8);
tcg_gen_sub_tl(t0, t0, t1);
tcg_gen_abs_tl(t0, t0);
tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
}
gen_store_mxu_gpr(t2, XRa);
}
}
/*
* Q8ADD XRa, XRb, XRc, ptn2
* Add/subtract quadruple of 8-bit packed in XRb
* to another one in XRc, put the result in XRa.
*/
static void gen_mxu_Q8ADD(DisasContext *ctx)
{
uint32_t aptn2, pad, XRc, XRb, XRa;
aptn2 = extract32(ctx->opcode, 24, 2);
pad = extract32(ctx->opcode, 21, 3);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();
gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t4, XRc);
for (int i = 0; i < 4; i++) {
tcg_gen_andi_tl(t0, t3, 0xff);
tcg_gen_andi_tl(t1, t4, 0xff);
if (i < 2) {
if (aptn2 & 0x01) {
tcg_gen_sub_tl(t0, t0, t1);
} else {
tcg_gen_add_tl(t0, t0, t1);
}
} else {
if (aptn2 & 0x02) {
tcg_gen_sub_tl(t0, t0, t1);
} else {
tcg_gen_add_tl(t0, t0, t1);
}
}
if (i < 3) {
tcg_gen_shri_tl(t3, t3, 8);
tcg_gen_shri_tl(t4, t4, 8);
}
if (i > 0) {
tcg_gen_deposit_tl(t2, t2, t0, 8 * i, 8);
} else {
tcg_gen_andi_tl(t0, t0, 0xff);
tcg_gen_mov_tl(t2, t0);
}
}
gen_store_mxu_gpr(t2, XRa);
}
}
/*
* Q8ADDE XRa, XRb, XRc, XRd, aptn2
* Add/subtract quadruple of 8-bit packed in XRb
* to another one in XRc, with zero extending
* to 16-bit and put results as packed 16-bit data
* into XRa and XRd.
* aptn2 manages action add or subtract of pairs of data.
*
* Q8ACCE XRa, XRb, XRc, XRd, aptn2
* Add/subtract quadruple of 8-bit packed in XRb
* to another one in XRc, with zero extending
* to 16-bit and accumulate results as packed 16-bit data
* into XRa and XRd.
* aptn2 manages action add or subtract of pairs of data.
*/
static void gen_mxu_q8adde(DisasContext *ctx, bool accumulate)
{
uint32_t aptn2, XRd, XRc, XRb, XRa;
aptn2 = extract32(ctx->opcode, 24, 2);
XRd = extract32(ctx->opcode, 18, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
if (XRa != 0) {
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
}
if (XRd != 0) {
tcg_gen_movi_tl(mxu_gpr[XRd - 1], 0);
}
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();
TCGv t5 = tcg_temp_new();
if (XRa != 0) {
gen_extract_mxu_gpr(t0, XRb, 16, 8);
gen_extract_mxu_gpr(t1, XRc, 16, 8);
gen_extract_mxu_gpr(t2, XRb, 24, 8);
gen_extract_mxu_gpr(t3, XRc, 24, 8);
if (aptn2 & 2) {
tcg_gen_sub_tl(t0, t0, t1);
tcg_gen_sub_tl(t2, t2, t3);
} else {
tcg_gen_add_tl(t0, t0, t1);
tcg_gen_add_tl(t2, t2, t3);
}
if (accumulate) {
gen_load_mxu_gpr(t5, XRa);
tcg_gen_extract_tl(t1, t5, 0, 16);
tcg_gen_extract_tl(t3, t5, 16, 16);
tcg_gen_add_tl(t0, t0, t1);
tcg_gen_add_tl(t2, t2, t3);
}
tcg_gen_shli_tl(t2, t2, 16);
tcg_gen_extract_tl(t0, t0, 0, 16);
tcg_gen_or_tl(t4, t2, t0);
}
if (XRd != 0) {
gen_extract_mxu_gpr(t0, XRb, 0, 8);
gen_extract_mxu_gpr(t1, XRc, 0, 8);
gen_extract_mxu_gpr(t2, XRb, 8, 8);
gen_extract_mxu_gpr(t3, XRc, 8, 8);
if (aptn2 & 1) {
tcg_gen_sub_tl(t0, t0, t1);
tcg_gen_sub_tl(t2, t2, t3);
} else {
tcg_gen_add_tl(t0, t0, t1);
tcg_gen_add_tl(t2, t2, t3);
}
if (accumulate) {
gen_load_mxu_gpr(t5, XRd);
tcg_gen_extract_tl(t1, t5, 0, 16);
tcg_gen_extract_tl(t3, t5, 16, 16);
tcg_gen_add_tl(t0, t0, t1);
tcg_gen_add_tl(t2, t2, t3);
}
tcg_gen_shli_tl(t2, t2, 16);
tcg_gen_extract_tl(t0, t0, 0, 16);
tcg_gen_or_tl(t5, t2, t0);
}
gen_store_mxu_gpr(t4, XRa);
gen_store_mxu_gpr(t5, XRd);
}
}
/*
* D8SUM XRa, XRb, XRc
* Double parallel add of quadruple unsigned 8-bit together
* with zero extending to 16-bit data.
* D8SUMC XRa, XRb, XRc
* Double parallel add of quadruple unsigned 8-bit together
* with zero extending to 16-bit data and adding 2 to each
* parallel result.
*/
static void gen_mxu_d8sum(DisasContext *ctx, bool sumc)
{
uint32_t pad, pad2, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 24, 2);
pad2 = extract32(ctx->opcode, 18, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0 || pad2 != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();
TCGv t5 = tcg_temp_new();
if (XRb != 0) {
tcg_gen_extract_tl(t0, mxu_gpr[XRb - 1], 0, 8);
tcg_gen_extract_tl(t1, mxu_gpr[XRb - 1], 8, 8);
tcg_gen_extract_tl(t2, mxu_gpr[XRb - 1], 16, 8);
tcg_gen_extract_tl(t3, mxu_gpr[XRb - 1], 24, 8);
tcg_gen_add_tl(t4, t0, t1);
tcg_gen_add_tl(t4, t4, t2);
tcg_gen_add_tl(t4, t4, t3);
} else {
tcg_gen_mov_tl(t4, 0);
}
if (XRc != 0) {
tcg_gen_extract_tl(t0, mxu_gpr[XRc - 1], 0, 8);
tcg_gen_extract_tl(t1, mxu_gpr[XRc - 1], 8, 8);
tcg_gen_extract_tl(t2, mxu_gpr[XRc - 1], 16, 8);
tcg_gen_extract_tl(t3, mxu_gpr[XRc - 1], 24, 8);
tcg_gen_add_tl(t5, t0, t1);
tcg_gen_add_tl(t5, t5, t2);
tcg_gen_add_tl(t5, t5, t3);
} else {
tcg_gen_mov_tl(t5, 0);
}
if (sumc) {
tcg_gen_addi_tl(t4, t4, 2);
tcg_gen_addi_tl(t5, t5, 2);
}
tcg_gen_shli_tl(t4, t4, 16);
tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
}
}
/*
* Q16ADD XRa, XRb, XRc, XRd, aptn2, optn2 - Quad packed
* 16-bit pattern addition.
*/
static void gen_mxu_q16add(DisasContext *ctx)
{
uint32_t aptn2, optn2, XRc, XRb, XRa, XRd;
aptn2 = extract32(ctx->opcode, 24, 2);
optn2 = extract32(ctx->opcode, 22, 2);
XRd = extract32(ctx->opcode, 18, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();
TCGv t5 = tcg_temp_new();
gen_load_mxu_gpr(t1, XRb);
tcg_gen_extract_tl(t0, t1, 0, 16);
tcg_gen_extract_tl(t1, t1, 16, 16);
gen_load_mxu_gpr(t3, XRc);
tcg_gen_extract_tl(t2, t3, 0, 16);
tcg_gen_extract_tl(t3, t3, 16, 16);
switch (optn2) {
case MXU_OPTN2_WW: /* XRB.H+XRC.H == lop, XRB.L+XRC.L == rop */
tcg_gen_mov_tl(t4, t1);
tcg_gen_mov_tl(t5, t0);
break;
case MXU_OPTN2_LW: /* XRB.L+XRC.H == lop, XRB.L+XRC.L == rop */
tcg_gen_mov_tl(t4, t0);
tcg_gen_mov_tl(t5, t0);
break;
case MXU_OPTN2_HW: /* XRB.H+XRC.H == lop, XRB.H+XRC.L == rop */
tcg_gen_mov_tl(t4, t1);
tcg_gen_mov_tl(t5, t1);
break;
case MXU_OPTN2_XW: /* XRB.L+XRC.H == lop, XRB.H+XRC.L == rop */
tcg_gen_mov_tl(t4, t0);
tcg_gen_mov_tl(t5, t1);
break;
}
switch (aptn2) {
case MXU_APTN2_AA: /* lop +, rop + */
tcg_gen_add_tl(t0, t4, t3);
tcg_gen_add_tl(t1, t5, t2);
tcg_gen_add_tl(t4, t4, t3);
tcg_gen_add_tl(t5, t5, t2);
break;
case MXU_APTN2_AS: /* lop +, rop + */
tcg_gen_sub_tl(t0, t4, t3);
tcg_gen_sub_tl(t1, t5, t2);
tcg_gen_add_tl(t4, t4, t3);
tcg_gen_add_tl(t5, t5, t2);
break;
case MXU_APTN2_SA: /* lop +, rop + */
tcg_gen_add_tl(t0, t4, t3);
tcg_gen_add_tl(t1, t5, t2);
tcg_gen_sub_tl(t4, t4, t3);
tcg_gen_sub_tl(t5, t5, t2);
break;
case MXU_APTN2_SS: /* lop +, rop + */
tcg_gen_sub_tl(t0, t4, t3);
tcg_gen_sub_tl(t1, t5, t2);
tcg_gen_sub_tl(t4, t4, t3);
tcg_gen_sub_tl(t5, t5, t2);
break;
}
tcg_gen_shli_tl(t0, t0, 16);
tcg_gen_extract_tl(t1, t1, 0, 16);
tcg_gen_shli_tl(t4, t4, 16);
tcg_gen_extract_tl(t5, t5, 0, 16);
tcg_gen_or_tl(mxu_gpr[XRa - 1], t4, t5);
tcg_gen_or_tl(mxu_gpr[XRd - 1], t0, t1);
}
/*
* Q16ACC XRa, XRb, XRc, XRd, aptn2 - Quad packed
* 16-bit addition/subtraction with accumulate.
*/
static void gen_mxu_q16acc(DisasContext *ctx)
{
uint32_t aptn2, XRc, XRb, XRa, XRd;
aptn2 = extract32(ctx->opcode, 24, 2);
XRd = extract32(ctx->opcode, 18, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv s3 = tcg_temp_new();
TCGv s2 = tcg_temp_new();
TCGv s1 = tcg_temp_new();
TCGv s0 = tcg_temp_new();
gen_load_mxu_gpr(t1, XRb);
tcg_gen_extract_tl(t0, t1, 0, 16);
tcg_gen_extract_tl(t1, t1, 16, 16);
gen_load_mxu_gpr(t3, XRc);
tcg_gen_extract_tl(t2, t3, 0, 16);
tcg_gen_extract_tl(t3, t3, 16, 16);
switch (aptn2) {
case MXU_APTN2_AA: /* lop +, rop + */
tcg_gen_add_tl(s3, t1, t3);
tcg_gen_add_tl(s2, t0, t2);
tcg_gen_add_tl(s1, t1, t3);
tcg_gen_add_tl(s0, t0, t2);
break;
case MXU_APTN2_AS: /* lop +, rop - */
tcg_gen_sub_tl(s3, t1, t3);
tcg_gen_sub_tl(s2, t0, t2);
tcg_gen_add_tl(s1, t1, t3);
tcg_gen_add_tl(s0, t0, t2);
break;
case MXU_APTN2_SA: /* lop -, rop + */
tcg_gen_add_tl(s3, t1, t3);
tcg_gen_add_tl(s2, t0, t2);
tcg_gen_sub_tl(s1, t1, t3);
tcg_gen_sub_tl(s0, t0, t2);
break;
case MXU_APTN2_SS: /* lop -, rop - */
tcg_gen_sub_tl(s3, t1, t3);
tcg_gen_sub_tl(s2, t0, t2);
tcg_gen_sub_tl(s1, t1, t3);
tcg_gen_sub_tl(s0, t0, t2);
break;
}
if (XRa != 0) {
tcg_gen_add_tl(t0, mxu_gpr[XRa - 1], s0);
tcg_gen_extract_tl(t0, t0, 0, 16);
tcg_gen_extract_tl(t1, mxu_gpr[XRa - 1], 16, 16);
tcg_gen_add_tl(t1, t1, s1);
tcg_gen_shli_tl(t1, t1, 16);
tcg_gen_or_tl(mxu_gpr[XRa - 1], t1, t0);
}
if (XRd != 0) {
tcg_gen_add_tl(t0, mxu_gpr[XRd - 1], s2);
tcg_gen_extract_tl(t0, t0, 0, 16);
tcg_gen_extract_tl(t1, mxu_gpr[XRd - 1], 16, 16);
tcg_gen_add_tl(t1, t1, s3);
tcg_gen_shli_tl(t1, t1, 16);
tcg_gen_or_tl(mxu_gpr[XRd - 1], t1, t0);
}
}
/*
* Q16ACCM XRa, XRb, XRc, XRd, aptn2 - Quad packed
* 16-bit accumulate.
*/
static void gen_mxu_q16accm(DisasContext *ctx)
{
uint32_t aptn2, XRc, XRb, XRa, XRd;
aptn2 = extract32(ctx->opcode, 24, 2);
XRd = extract32(ctx->opcode, 18, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
gen_load_mxu_gpr(t2, XRb);
gen_load_mxu_gpr(t3, XRc);
if (XRa != 0) {
TCGv a0 = tcg_temp_new();
TCGv a1 = tcg_temp_new();
tcg_gen_extract_tl(t0, t2, 0, 16);
tcg_gen_extract_tl(t1, t2, 16, 16);
gen_load_mxu_gpr(a1, XRa);
tcg_gen_extract_tl(a0, a1, 0, 16);
tcg_gen_extract_tl(a1, a1, 16, 16);
if (aptn2 & 2) {
tcg_gen_sub_tl(a0, a0, t0);
tcg_gen_sub_tl(a1, a1, t1);
} else {
tcg_gen_add_tl(a0, a0, t0);
tcg_gen_add_tl(a1, a1, t1);
}
tcg_gen_extract_tl(a0, a0, 0, 16);
tcg_gen_shli_tl(a1, a1, 16);
tcg_gen_or_tl(mxu_gpr[XRa - 1], a1, a0);
}
if (XRd != 0) {
TCGv a0 = tcg_temp_new();
TCGv a1 = tcg_temp_new();
tcg_gen_extract_tl(t0, t3, 0, 16);
tcg_gen_extract_tl(t1, t3, 16, 16);
gen_load_mxu_gpr(a1, XRd);
tcg_gen_extract_tl(a0, a1, 0, 16);
tcg_gen_extract_tl(a1, a1, 16, 16);
if (aptn2 & 1) {
tcg_gen_sub_tl(a0, a0, t0);
tcg_gen_sub_tl(a1, a1, t1);
} else {
tcg_gen_add_tl(a0, a0, t0);
tcg_gen_add_tl(a1, a1, t1);
}
tcg_gen_extract_tl(a0, a0, 0, 16);
tcg_gen_shli_tl(a1, a1, 16);
tcg_gen_or_tl(mxu_gpr[XRd - 1], a1, a0);
}
}
/*
* D16ASUM XRa, XRb, XRc, XRd, aptn2 - Double packed
* 16-bit sign extended addition and accumulate.
*/
static void gen_mxu_d16asum(DisasContext *ctx)
{
uint32_t aptn2, XRc, XRb, XRa, XRd;
aptn2 = extract32(ctx->opcode, 24, 2);
XRd = extract32(ctx->opcode, 18, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
gen_load_mxu_gpr(t2, XRb);
gen_load_mxu_gpr(t3, XRc);
if (XRa != 0) {
tcg_gen_sextract_tl(t0, t2, 0, 16);
tcg_gen_sextract_tl(t1, t2, 16, 16);
tcg_gen_add_tl(t0, t0, t1);
if (aptn2 & 2) {
tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
} else {
tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
}
}
if (XRd != 0) {
tcg_gen_sextract_tl(t0, t3, 0, 16);
tcg_gen_sextract_tl(t1, t3, 16, 16);
tcg_gen_add_tl(t0, t0, t1);
if (aptn2 & 1) {
tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
} else {
tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t0);
}
}
}
/*
* D32ADD XRa, XRb, XRc, XRd, aptn2 - Double
* 32 bit pattern addition/subtraction, set carry.
*
* D32ADDC XRa, XRb, XRc, XRd, aptn2 - Double
* 32 bit pattern addition/subtraction with carry.
*/
static void gen_mxu_d32add(DisasContext *ctx)
{
uint32_t aptn2, addc, XRc, XRb, XRa, XRd;
aptn2 = extract32(ctx->opcode, 24, 2);
addc = extract32(ctx->opcode, 22, 2);
XRd = extract32(ctx->opcode, 18, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv cr = tcg_temp_new();
if (unlikely(addc > 1)) {
/* opcode incorrect -> do nothing */
} else if (addc == 1) {
if (unlikely(XRa == 0 && XRd == 0)) {
/* destinations are zero register -> do nothing */
} else {
/* FIXME ??? What if XRa == XRd ??? */
/* aptn2 is unused here */
gen_load_mxu_gpr(t0, XRb);
gen_load_mxu_gpr(t1, XRc);
gen_load_mxu_cr(cr);
if (XRa != 0) {
tcg_gen_extract_tl(t2, cr, 31, 1);
tcg_gen_add_tl(t0, t0, t2);
tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
}
if (XRd != 0) {
tcg_gen_extract_tl(t2, cr, 30, 1);
tcg_gen_add_tl(t1, t1, t2);
tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
}
}
} else if (unlikely(XRa == 0 && XRd == 0)) {
/* destinations are zero register -> do nothing */
} else {
/* common case */
/* FIXME ??? What if XRa == XRd ??? */
TCGv carry = tcg_temp_new();
gen_load_mxu_gpr(t0, XRb);
gen_load_mxu_gpr(t1, XRc);
gen_load_mxu_cr(cr);
if (XRa != 0) {
if (aptn2 & 2) {
tcg_gen_sub_i32(t2, t0, t1);
tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
} else {
tcg_gen_add_i32(t2, t0, t1);
tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
}
tcg_gen_andi_tl(cr, cr, 0x7fffffff);
tcg_gen_shli_tl(carry, carry, 31);
tcg_gen_or_tl(cr, cr, carry);
gen_store_mxu_gpr(t2, XRa);
}
if (XRd != 0) {
if (aptn2 & 1) {
tcg_gen_sub_i32(t2, t0, t1);
tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t1);
} else {
tcg_gen_add_i32(t2, t0, t1);
tcg_gen_setcond_tl(TCG_COND_GTU, carry, t0, t2);
}
tcg_gen_andi_tl(cr, cr, 0xbfffffff);
tcg_gen_shli_tl(carry, carry, 30);
tcg_gen_or_tl(cr, cr, carry);
gen_store_mxu_gpr(t2, XRd);
}
gen_store_mxu_cr(cr);
}
}
/*
* D32ACC XRa, XRb, XRc, XRd, aptn2 - Double
* 32 bit pattern addition/subtraction and accumulate.
*/
static void gen_mxu_d32acc(DisasContext *ctx)
{
uint32_t aptn2, XRc, XRb, XRa, XRd;
aptn2 = extract32(ctx->opcode, 24, 2);
XRd = extract32(ctx->opcode, 18, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
if (unlikely(XRa == 0 && XRd == 0)) {
/* destinations are zero register -> do nothing */
} else {
/* common case */
gen_load_mxu_gpr(t0, XRb);
gen_load_mxu_gpr(t1, XRc);
if (XRa != 0) {
if (aptn2 & 2) {
tcg_gen_sub_tl(t2, t0, t1);
} else {
tcg_gen_add_tl(t2, t0, t1);
}
tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
}
if (XRd != 0) {
if (aptn2 & 1) {
tcg_gen_sub_tl(t2, t0, t1);
} else {
tcg_gen_add_tl(t2, t0, t1);
}
tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
}
}
}
/*
* D32ACCM XRa, XRb, XRc, XRd, aptn2 - Double
* 32 bit pattern addition/subtraction and accumulate.
*/
static void gen_mxu_d32accm(DisasContext *ctx)
{
uint32_t aptn2, XRc, XRb, XRa, XRd;
aptn2 = extract32(ctx->opcode, 24, 2);
XRd = extract32(ctx->opcode, 18, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
if (unlikely(XRa == 0 && XRd == 0)) {
/* destinations are zero register -> do nothing */
} else {
/* common case */
gen_load_mxu_gpr(t0, XRb);
gen_load_mxu_gpr(t1, XRc);
if (XRa != 0) {
tcg_gen_add_tl(t2, t0, t1);
if (aptn2 & 2) {
tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
} else {
tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t2);
}
}
if (XRd != 0) {
tcg_gen_sub_tl(t2, t0, t1);
if (aptn2 & 1) {
tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
} else {
tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t2);
}
}
}
}
/*
* D32ASUM XRa, XRb, XRc, XRd, aptn2 - Double
* 32 bit pattern addition/subtraction.
*/
static void gen_mxu_d32asum(DisasContext *ctx)
{
uint32_t aptn2, XRc, XRb, XRa, XRd;
aptn2 = extract32(ctx->opcode, 24, 2);
XRd = extract32(ctx->opcode, 18, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
if (unlikely(XRa == 0 && XRd == 0)) {
/* destinations are zero register -> do nothing */
} else {
/* common case */
gen_load_mxu_gpr(t0, XRb);
gen_load_mxu_gpr(t1, XRc);
if (XRa != 0) {
if (aptn2 & 2) {
tcg_gen_sub_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
} else {
tcg_gen_add_tl(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
}
}
if (XRd != 0) {
if (aptn2 & 1) {
tcg_gen_sub_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
} else {
tcg_gen_add_tl(mxu_gpr[XRd - 1], mxu_gpr[XRd - 1], t1);
}
}
}
}
/*
* MXU instruction category: Miscellaneous
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* S32EXTR S32LUI
* S32EXTRV
* Q16SAT
* Q16SCOP
*/
/*
* S32EXTR XRa, XRd, rs, bits5
* Extract bits5 bits from 64-bit pair {XRa:XRd}
* starting from rs[4:0] offset and put to the XRa.
*/
static void gen_mxu_s32extr(DisasContext *ctx)
{
TCGv t0, t1, t2, t3;
uint32_t XRa, XRd, rs, bits5;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
t2 = tcg_temp_new();
t3 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
XRd = extract32(ctx->opcode, 10, 4);
bits5 = extract32(ctx->opcode, 16, 5);
rs = extract32(ctx->opcode, 21, 5);
/* {tmp} = {XRa:XRd} >> (64 - rt - bits5); */
/* {XRa} = extract({tmp}, 0, bits5); */
if (bits5 > 0) {
TCGLabel *l_xra_only = gen_new_label();
TCGLabel *l_done = gen_new_label();
gen_load_mxu_gpr(t0, XRd);
gen_load_mxu_gpr(t1, XRa);
gen_load_gpr(t2, rs);
tcg_gen_andi_tl(t2, t2, 0x1f);
tcg_gen_subfi_tl(t2, 32, t2);
tcg_gen_brcondi_tl(TCG_COND_GE, t2, bits5, l_xra_only);
tcg_gen_subfi_tl(t2, bits5, t2);
tcg_gen_subfi_tl(t3, 32, t2);
tcg_gen_shr_tl(t0, t0, t3);
tcg_gen_shl_tl(t1, t1, t2);
tcg_gen_or_tl(t0, t0, t1);
tcg_gen_br(l_done);
gen_set_label(l_xra_only);
tcg_gen_subi_tl(t2, t2, bits5);
tcg_gen_shr_tl(t0, t1, t2);
gen_set_label(l_done);
tcg_gen_extract_tl(t0, t0, 0, bits5);
} else {
/* unspecified behavior but matches tests on real hardware*/
tcg_gen_movi_tl(t0, 0);
}
gen_store_mxu_gpr(t0, XRa);
}
/*
* S32EXTRV XRa, XRd, rs, rt
* Extract rt[4:0] bits from 64-bit pair {XRa:XRd}
* starting from rs[4:0] offset and put to the XRa.
*/
static void gen_mxu_s32extrv(DisasContext *ctx)
{
TCGv t0, t1, t2, t3, t4;
uint32_t XRa, XRd, rs, rt;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
t2 = tcg_temp_new();
t3 = tcg_temp_new();
t4 = tcg_temp_new();
TCGLabel *l_xra_only = gen_new_label();
TCGLabel *l_done = gen_new_label();
TCGLabel *l_zero = gen_new_label();
TCGLabel *l_extract = gen_new_label();
XRa = extract32(ctx->opcode, 6, 4);
XRd = extract32(ctx->opcode, 10, 4);
rt = extract32(ctx->opcode, 16, 5);
rs = extract32(ctx->opcode, 21, 5);
/* {tmp} = {XRa:XRd} >> (64 - rs - rt) */
gen_load_mxu_gpr(t0, XRd);
gen_load_mxu_gpr(t1, XRa);
gen_load_gpr(t2, rs);
gen_load_gpr(t4, rt);
tcg_gen_brcondi_tl(TCG_COND_EQ, t4, 0, l_zero);
tcg_gen_andi_tl(t2, t2, 0x1f);
tcg_gen_subfi_tl(t2, 32, t2);
tcg_gen_brcond_tl(TCG_COND_GE, t2, t4, l_xra_only);
tcg_gen_sub_tl(t2, t4, t2);
tcg_gen_subfi_tl(t3, 32, t2);
tcg_gen_shr_tl(t0, t0, t3);
tcg_gen_shl_tl(t1, t1, t2);
tcg_gen_or_tl(t0, t0, t1);
tcg_gen_br(l_extract);
gen_set_label(l_xra_only);
tcg_gen_sub_tl(t2, t2, t4);
tcg_gen_shr_tl(t0, t1, t2);
tcg_gen_br(l_extract);
/* unspecified behavior but matches tests on real hardware*/
gen_set_label(l_zero);
tcg_gen_movi_tl(t0, 0);
tcg_gen_br(l_done);
/* {XRa} = extract({tmp}, 0, rt) */
gen_set_label(l_extract);
tcg_gen_subfi_tl(t4, 32, t4);
tcg_gen_shl_tl(t0, t0, t4);
tcg_gen_shr_tl(t0, t0, t4);
gen_set_label(l_done);
gen_store_mxu_gpr(t0, XRa);
}
/*
* S32LUI XRa, S8, optn3
* Permutate the immediate S8 value to form a word
* to update XRa.
*/
static void gen_mxu_s32lui(DisasContext *ctx)
{
uint32_t XRa, s8, optn3, pad;
XRa = extract32(ctx->opcode, 6, 4);
s8 = extract32(ctx->opcode, 10, 8);
pad = extract32(ctx->opcode, 21, 2);
optn3 = extract32(ctx->opcode, 23, 3);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else {
uint32_t s16;
TCGv t0 = tcg_temp_new();
switch (optn3) {
case 0:
tcg_gen_movi_tl(t0, s8);
break;
case 1:
tcg_gen_movi_tl(t0, s8 << 8);
break;
case 2:
tcg_gen_movi_tl(t0, s8 << 16);
break;
case 3:
tcg_gen_movi_tl(t0, s8 << 24);
break;
case 4:
tcg_gen_movi_tl(t0, (s8 << 16) | s8);
break;
case 5:
tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 8));
break;
case 6:
s16 = (uint16_t)(int16_t)(int8_t)s8;
tcg_gen_movi_tl(t0, (s16 << 16) | s16);
break;
case 7:
tcg_gen_movi_tl(t0, (s8 << 24) | (s8 << 16) | (s8 << 8) | s8);
break;
}
gen_store_mxu_gpr(t0, XRa);
}
}
/*
* Q16SAT XRa, XRb, XRc
* Packs four 16-bit signed integers in XRb and XRc to
* four saturated unsigned 8-bit into XRa.
*
*/
static void gen_mxu_Q16SAT(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 3);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
tcg_gen_movi_tl(t2, 0);
if (XRb != 0) {
TCGLabel *l_less_hi = gen_new_label();
TCGLabel *l_less_lo = gen_new_label();
TCGLabel *l_lo = gen_new_label();
TCGLabel *l_greater_hi = gen_new_label();
TCGLabel *l_greater_lo = gen_new_label();
TCGLabel *l_done = gen_new_label();
tcg_gen_sari_tl(t0, mxu_gpr[XRb - 1], 16);
tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
tcg_gen_br(l_lo);
gen_set_label(l_less_hi);
tcg_gen_movi_tl(t0, 0);
tcg_gen_br(l_lo);
gen_set_label(l_greater_hi);
tcg_gen_movi_tl(t0, 255);
gen_set_label(l_lo);
tcg_gen_shli_tl(t1, mxu_gpr[XRb - 1], 16);
tcg_gen_sari_tl(t1, t1, 16);
tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
tcg_gen_br(l_done);
gen_set_label(l_less_lo);
tcg_gen_movi_tl(t1, 0);
tcg_gen_br(l_done);
gen_set_label(l_greater_lo);
tcg_gen_movi_tl(t1, 255);
gen_set_label(l_done);
tcg_gen_shli_tl(t2, t0, 24);
tcg_gen_shli_tl(t1, t1, 16);
tcg_gen_or_tl(t2, t2, t1);
}
if (XRc != 0) {
TCGLabel *l_less_hi = gen_new_label();
TCGLabel *l_less_lo = gen_new_label();
TCGLabel *l_lo = gen_new_label();
TCGLabel *l_greater_hi = gen_new_label();
TCGLabel *l_greater_lo = gen_new_label();
TCGLabel *l_done = gen_new_label();
tcg_gen_sari_tl(t0, mxu_gpr[XRc - 1], 16);
tcg_gen_brcondi_tl(TCG_COND_LT, t0, 0, l_less_hi);
tcg_gen_brcondi_tl(TCG_COND_GT, t0, 255, l_greater_hi);
tcg_gen_br(l_lo);
gen_set_label(l_less_hi);
tcg_gen_movi_tl(t0, 0);
tcg_gen_br(l_lo);
gen_set_label(l_greater_hi);
tcg_gen_movi_tl(t0, 255);
gen_set_label(l_lo);
tcg_gen_shli_tl(t1, mxu_gpr[XRc - 1], 16);
tcg_gen_sari_tl(t1, t1, 16);
tcg_gen_brcondi_tl(TCG_COND_LT, t1, 0, l_less_lo);
tcg_gen_brcondi_tl(TCG_COND_GT, t1, 255, l_greater_lo);
tcg_gen_br(l_done);
gen_set_label(l_less_lo);
tcg_gen_movi_tl(t1, 0);
tcg_gen_br(l_done);
gen_set_label(l_greater_lo);
tcg_gen_movi_tl(t1, 255);
gen_set_label(l_done);
tcg_gen_shli_tl(t0, t0, 8);
tcg_gen_or_tl(t2, t2, t0);
tcg_gen_or_tl(t2, t2, t1);
}
gen_store_mxu_gpr(t2, XRa);
}
}
/*
* Q16SCOP XRa, XRd, XRb, XRc
* Determine sign of quad packed 16-bit signed values
* in XRb and XRc put result in XRa and XRd respectively.
*/
static void gen_mxu_q16scop(DisasContext *ctx)
{
uint32_t XRd, XRc, XRb, XRa;
XRd = extract32(ctx->opcode, 18, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();
TCGLabel *l_b_hi_lt = gen_new_label();
TCGLabel *l_b_hi_gt = gen_new_label();
TCGLabel *l_b_lo = gen_new_label();
TCGLabel *l_b_lo_lt = gen_new_label();
TCGLabel *l_c_hi = gen_new_label();
TCGLabel *l_c_hi_lt = gen_new_label();
TCGLabel *l_c_hi_gt = gen_new_label();
TCGLabel *l_c_lo = gen_new_label();
TCGLabel *l_c_lo_lt = gen_new_label();
TCGLabel *l_done = gen_new_label();
gen_load_mxu_gpr(t0, XRb);
gen_load_mxu_gpr(t1, XRc);
tcg_gen_sextract_tl(t2, t0, 16, 16);
tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_hi_lt);
tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_b_hi_gt);
tcg_gen_movi_tl(t3, 0);
tcg_gen_br(l_b_lo);
gen_set_label(l_b_hi_lt);
tcg_gen_movi_tl(t3, 0xffff0000);
tcg_gen_br(l_b_lo);
gen_set_label(l_b_hi_gt);
tcg_gen_movi_tl(t3, 0x00010000);
gen_set_label(l_b_lo);
tcg_gen_sextract_tl(t2, t0, 0, 16);
tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_c_hi);
tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_b_lo_lt);
tcg_gen_ori_tl(t3, t3, 0x00000001);
tcg_gen_br(l_c_hi);
gen_set_label(l_b_lo_lt);
tcg_gen_ori_tl(t3, t3, 0x0000ffff);
tcg_gen_br(l_c_hi);
gen_set_label(l_c_hi);
tcg_gen_sextract_tl(t2, t1, 16, 16);
tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_hi_lt);
tcg_gen_brcondi_tl(TCG_COND_GT, t2, 0, l_c_hi_gt);
tcg_gen_movi_tl(t4, 0);
tcg_gen_br(l_c_lo);
gen_set_label(l_c_hi_lt);
tcg_gen_movi_tl(t4, 0xffff0000);
tcg_gen_br(l_c_lo);
gen_set_label(l_c_hi_gt);
tcg_gen_movi_tl(t4, 0x00010000);
gen_set_label(l_c_lo);
tcg_gen_sextract_tl(t2, t1, 0, 16);
tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_done);
tcg_gen_brcondi_tl(TCG_COND_LT, t2, 0, l_c_lo_lt);
tcg_gen_ori_tl(t4, t4, 0x00000001);
tcg_gen_br(l_done);
gen_set_label(l_c_lo_lt);
tcg_gen_ori_tl(t4, t4, 0x0000ffff);
gen_set_label(l_done);
gen_store_mxu_gpr(t3, XRa);
gen_store_mxu_gpr(t4, XRd);
}
/*
* S32SFL XRa, XRd, XRb, XRc
* Shuffle bytes according to one of four patterns.
*/
static void gen_mxu_s32sfl(DisasContext *ctx)
{
uint32_t XRd, XRc, XRb, XRa, ptn2;
XRd = extract32(ctx->opcode, 18, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
ptn2 = extract32(ctx->opcode, 24, 2);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
gen_load_mxu_gpr(t0, XRb);
gen_load_mxu_gpr(t1, XRc);
switch (ptn2) {
case 0:
tcg_gen_andi_tl(t2, t0, 0xff000000);
tcg_gen_andi_tl(t3, t1, 0x000000ff);
tcg_gen_deposit_tl(t3, t3, t0, 8, 8);
tcg_gen_shri_tl(t0, t0, 8);
tcg_gen_shri_tl(t1, t1, 8);
tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
tcg_gen_deposit_tl(t3, t3, t1, 16, 8);
tcg_gen_shri_tl(t0, t0, 8);
tcg_gen_shri_tl(t1, t1, 8);
tcg_gen_deposit_tl(t2, t2, t0, 8, 8);
tcg_gen_deposit_tl(t2, t2, t1, 0, 8);
tcg_gen_shri_tl(t1, t1, 8);
tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
break;
case 1:
tcg_gen_andi_tl(t2, t0, 0xff000000);
tcg_gen_andi_tl(t3, t1, 0x000000ff);
tcg_gen_deposit_tl(t3, t3, t0, 16, 8);
tcg_gen_shri_tl(t0, t0, 8);
tcg_gen_shri_tl(t1, t1, 8);
tcg_gen_deposit_tl(t2, t2, t0, 16, 8);
tcg_gen_deposit_tl(t2, t2, t1, 0, 8);
tcg_gen_shri_tl(t0, t0, 8);
tcg_gen_shri_tl(t1, t1, 8);
tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
tcg_gen_deposit_tl(t3, t3, t1, 8, 8);
tcg_gen_shri_tl(t1, t1, 8);
tcg_gen_deposit_tl(t2, t2, t1, 8, 8);
break;
case 2:
tcg_gen_andi_tl(t2, t0, 0xff00ff00);
tcg_gen_andi_tl(t3, t1, 0x00ff00ff);
tcg_gen_deposit_tl(t3, t3, t0, 8, 8);
tcg_gen_shri_tl(t0, t0, 16);
tcg_gen_shri_tl(t1, t1, 8);
tcg_gen_deposit_tl(t2, t2, t1, 0, 8);
tcg_gen_deposit_tl(t3, t3, t0, 24, 8);
tcg_gen_shri_tl(t1, t1, 16);
tcg_gen_deposit_tl(t2, t2, t1, 16, 8);
break;
case 3:
tcg_gen_andi_tl(t2, t0, 0xffff0000);
tcg_gen_andi_tl(t3, t1, 0x0000ffff);
tcg_gen_shri_tl(t1, t1, 16);
tcg_gen_deposit_tl(t2, t2, t1, 0, 16);
tcg_gen_deposit_tl(t3, t3, t0, 16, 16);
break;
}
gen_store_mxu_gpr(t2, XRa);
gen_store_mxu_gpr(t3, XRd);
}
/*
* Q8SAD XRa, XRd, XRb, XRc
* Typical SAD operation for motion estimation.
*/
static void gen_mxu_q8sad(DisasContext *ctx)
{
uint32_t XRd, XRc, XRb, XRa;
XRd = extract32(ctx->opcode, 18, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGv t4 = tcg_temp_new();
TCGv t5 = tcg_temp_new();
gen_load_mxu_gpr(t2, XRb);
gen_load_mxu_gpr(t3, XRc);
gen_load_mxu_gpr(t5, XRd);
tcg_gen_movi_tl(t4, 0);
for (int i = 0; i < 4; i++) {
tcg_gen_andi_tl(t0, t2, 0xff);
tcg_gen_andi_tl(t1, t3, 0xff);
tcg_gen_sub_tl(t0, t0, t1);
tcg_gen_abs_tl(t0, t0);
tcg_gen_add_tl(t4, t4, t0);
if (i < 3) {
tcg_gen_shri_tl(t2, t2, 8);
tcg_gen_shri_tl(t3, t3, 8);
}
}
tcg_gen_add_tl(t5, t5, t4);
gen_store_mxu_gpr(t4, XRa);
gen_store_mxu_gpr(t5, XRd);
}
/*
* MXU instruction category: align
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* S32ALN S32ALNI
*/
/*
* S32ALNI XRc, XRb, XRa, optn3
* Arrange bytes from XRb and XRc according to one of five sets of
* rules determined by optn3, and place the result in XRa.
*/
static void gen_mxu_S32ALNI(DisasContext *ctx)
{
uint32_t optn3, pad, XRc, XRb, XRa;
optn3 = extract32(ctx->opcode, 23, 3);
pad = extract32(ctx->opcode, 21, 2);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to all 0s */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == 0)) {
/* XRb zero register -> just appropriatelly shift XRc into XRa */
switch (optn3) {
case MXU_OPTN3_PTN0:
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
break;
case MXU_OPTN3_PTN1:
case MXU_OPTN3_PTN2:
case MXU_OPTN3_PTN3:
tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1],
8 * (4 - optn3));
break;
case MXU_OPTN3_PTN4:
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
break;
}
} else if (unlikely(XRc == 0)) {
/* XRc zero register -> just appropriatelly shift XRb into XRa */
switch (optn3) {
case MXU_OPTN3_PTN0:
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
break;
case MXU_OPTN3_PTN1:
case MXU_OPTN3_PTN2:
case MXU_OPTN3_PTN3:
tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
break;
case MXU_OPTN3_PTN4:
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
break;
}
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just rotation or moving from any of them */
switch (optn3) {
case MXU_OPTN3_PTN0:
case MXU_OPTN3_PTN4:
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
break;
case MXU_OPTN3_PTN1:
case MXU_OPTN3_PTN2:
case MXU_OPTN3_PTN3:
tcg_gen_rotli_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
break;
}
} else {
/* the most general case */
switch (optn3) {
case MXU_OPTN3_PTN0:
{
/* */
/* XRb XRc */
/* +---------------+ */
/* | A B C D | E F G H */
/* +-------+-------+ */
/* | */
/* XRa */
/* */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
}
break;
case MXU_OPTN3_PTN1:
{
/* */
/* XRb XRc */
/* +-------------------+ */
/* A | B C D E | F G H */
/* +---------+---------+ */
/* | */
/* XRa */
/* */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x00FFFFFF);
tcg_gen_shli_i32(t0, t0, 8);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
tcg_gen_shri_i32(t1, t1, 24);
tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
}
break;
case MXU_OPTN3_PTN2:
{
/* */
/* XRb XRc */
/* +-------------------+ */
/* A B | C D E F | G H */
/* +---------+---------+ */
/* | */
/* XRa */
/* */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
tcg_gen_shli_i32(t0, t0, 16);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
tcg_gen_shri_i32(t1, t1, 16);
tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
}
break;
case MXU_OPTN3_PTN3:
{
/* */
/* XRb XRc */
/* +-------------------+ */
/* A B C | D E F G | H */
/* +---------+---------+ */
/* | */
/* XRa */
/* */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x000000FF);
tcg_gen_shli_i32(t0, t0, 24);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFFFF00);
tcg_gen_shri_i32(t1, t1, 8);
tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
}
break;
case MXU_OPTN3_PTN4:
{
/* */
/* XRb XRc */
/* +---------------+ */
/* A B C D | E F G H | */
/* +-------+-------+ */
/* | */
/* XRa */
/* */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
}
break;
}
}
}
/*
* S32ALN XRc, XRb, XRa, rs
* Arrange bytes from XRb and XRc according to one of five sets of
* rules determined by rs[2:0], and place the result in XRa.
*/
static void gen_mxu_S32ALN(DisasContext *ctx)
{
uint32_t rs, XRc, XRb, XRa;
rs = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to all 0s */
tcg_gen_movi_tl(mxu_gpr[XRa - 1], 0);
} else {
/* the most general case */
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv t2 = tcg_temp_new();
TCGv t3 = tcg_temp_new();
TCGLabel *l_exit = gen_new_label();
TCGLabel *l_b_only = gen_new_label();
TCGLabel *l_c_only = gen_new_label();
gen_load_mxu_gpr(t0, XRb);
gen_load_mxu_gpr(t1, XRc);
gen_load_gpr(t2, rs);
tcg_gen_andi_tl(t2, t2, 0x07);
/* do nothing for undefined cases */
tcg_gen_brcondi_tl(TCG_COND_GE, t2, 5, l_exit);
tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 0, l_b_only);
tcg_gen_brcondi_tl(TCG_COND_EQ, t2, 4, l_c_only);
tcg_gen_shli_tl(t2, t2, 3);
tcg_gen_subfi_tl(t3, 32, t2);
tcg_gen_shl_tl(t0, t0, t2);
tcg_gen_shr_tl(t1, t1, t3);
tcg_gen_or_tl(mxu_gpr[XRa - 1], t0, t1);
tcg_gen_br(l_exit);
gen_set_label(l_b_only);
gen_store_mxu_gpr(t0, XRa);
tcg_gen_br(l_exit);
gen_set_label(l_c_only);
gen_store_mxu_gpr(t1, XRa);
gen_set_label(l_exit);
}
}
/*
* S32MADD XRa, XRd, rb, rc
* 32 to 64 bit signed multiply with subsequent add
* result stored in {XRa, XRd} pair, stain HI/LO.
* S32MADDU XRa, XRd, rb, rc
* 32 to 64 bit unsigned multiply with subsequent add
* result stored in {XRa, XRd} pair, stain HI/LO.
* S32MSUB XRa, XRd, rb, rc
* 32 to 64 bit signed multiply with subsequent subtract
* result stored in {XRa, XRd} pair, stain HI/LO.
* S32MSUBU XRa, XRd, rb, rc
* 32 to 64 bit unsigned multiply with subsequent subtract
* result stored in {XRa, XRd} pair, stain HI/LO.
*/
static void gen_mxu_s32madd_sub(DisasContext *ctx, bool sub, bool uns)
{
uint32_t XRa, XRd, Rb, Rc;
XRa = extract32(ctx->opcode, 6, 4);
XRd = extract32(ctx->opcode, 10, 4);
Rb = extract32(ctx->opcode, 16, 5);
Rc = extract32(ctx->opcode, 21, 5);
if (unlikely(Rb == 0 || Rc == 0)) {
/* do nothing because x + 0 * y => x */
} else if (unlikely(XRa == 0 && XRd == 0)) {
/* do nothing because result just dropped */
} else {
TCGv t0 = tcg_temp_new();
TCGv t1 = tcg_temp_new();
TCGv_i64 t2 = tcg_temp_new_i64();
TCGv_i64 t3 = tcg_temp_new_i64();
gen_load_gpr(t0, Rb);
gen_load_gpr(t1, Rc);
if (uns) {
tcg_gen_extu_tl_i64(t2, t0);
tcg_gen_extu_tl_i64(t3, t1);
} else {
tcg_gen_ext_tl_i64(t2, t0);
tcg_gen_ext_tl_i64(t3, t1);
}
tcg_gen_mul_i64(t2, t2, t3);
gen_load_mxu_gpr(t0, XRa);
gen_load_mxu_gpr(t1, XRd);
tcg_gen_concat_tl_i64(t3, t1, t0);
if (sub) {
tcg_gen_sub_i64(t3, t3, t2);
} else {
tcg_gen_add_i64(t3, t3, t2);
}
gen_move_low32(t1, t3);
gen_move_high32(t0, t3);
tcg_gen_mov_tl(cpu_HI[0], t0);
tcg_gen_mov_tl(cpu_LO[0], t1);
gen_store_mxu_gpr(t1, XRd);
gen_store_mxu_gpr(t0, XRa);
}
}
/*
* Decoding engine for MXU
* =======================
*/
static void decode_opc_mxu__pool00(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 18, 3);
switch (opcode) {
case OPC_MXU_S32MAX:
case OPC_MXU_S32MIN:
gen_mxu_S32MAX_S32MIN(ctx);
break;
case OPC_MXU_D16MAX:
case OPC_MXU_D16MIN:
gen_mxu_D16MAX_D16MIN(ctx);
break;
case OPC_MXU_Q8MAX:
case OPC_MXU_Q8MIN:
gen_mxu_Q8MAX_Q8MIN(ctx);
break;
case OPC_MXU_Q8SLT:
gen_mxu_q8slt(ctx, false);
break;
case OPC_MXU_Q8SLTU:
gen_mxu_q8slt(ctx, true);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static bool decode_opc_mxu_s32madd_sub(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 0, 6);
uint32_t pad = extract32(ctx->opcode, 14, 2);
if (pad != 2) {
/* MIPS32R1 MADD/MADDU/MSUB/MSUBU are on pad == 0 */
return false;
}
switch (opcode) {
case OPC_MXU_S32MADD:
gen_mxu_s32madd_sub(ctx, false, false);
break;
case OPC_MXU_S32MADDU:
gen_mxu_s32madd_sub(ctx, false, true);
break;
case OPC_MXU_S32MSUB:
gen_mxu_s32madd_sub(ctx, true, false);
break;
case OPC_MXU_S32MSUBU:
gen_mxu_s32madd_sub(ctx, true, true);
break;
default:
return false;
}
return true;
}
static void decode_opc_mxu__pool01(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 18, 3);
switch (opcode) {
case OPC_MXU_S32SLT:
gen_mxu_S32SLT(ctx);
break;
case OPC_MXU_D16SLT:
gen_mxu_D16SLT(ctx);
break;
case OPC_MXU_D16AVG:
gen_mxu_d16avg(ctx, false);
break;
case OPC_MXU_D16AVGR:
gen_mxu_d16avg(ctx, true);
break;
case OPC_MXU_Q8AVG:
gen_mxu_q8avg(ctx, false);
break;
case OPC_MXU_Q8AVGR:
gen_mxu_q8avg(ctx, true);
break;
case OPC_MXU_Q8ADD:
gen_mxu_Q8ADD(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool02(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 18, 3);
switch (opcode) {
case OPC_MXU_S32CPS:
gen_mxu_S32CPS(ctx);
break;
case OPC_MXU_D16CPS:
gen_mxu_D16CPS(ctx);
break;
case OPC_MXU_Q8ABD:
gen_mxu_Q8ABD(ctx);
break;
case OPC_MXU_Q16SAT:
gen_mxu_Q16SAT(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool03(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 24, 2);
switch (opcode) {
case OPC_MXU_D16MULF:
gen_mxu_d16mul(ctx, true, true);
break;
case OPC_MXU_D16MULE:
gen_mxu_d16mul(ctx, true, false);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool04(DisasContext *ctx)
{
uint32_t reversed = extract32(ctx->opcode, 20, 1);
uint32_t opcode = extract32(ctx->opcode, 10, 4);
/* Don't care about opcode bits as their meaning is unknown yet */
switch (opcode) {
default:
gen_mxu_s32ldxx(ctx, reversed, false);
break;
}
}
static void decode_opc_mxu__pool05(DisasContext *ctx)
{
uint32_t reversed = extract32(ctx->opcode, 20, 1);
uint32_t opcode = extract32(ctx->opcode, 10, 4);
/* Don't care about opcode bits as their meaning is unknown yet */
switch (opcode) {
default:
gen_mxu_s32stxx(ctx, reversed, false);
break;
}
}
static void decode_opc_mxu__pool06(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 10, 4);
uint32_t strd2 = extract32(ctx->opcode, 14, 2);
switch (opcode) {
case OPC_MXU_S32LDST:
case OPC_MXU_S32LDSTR:
if (strd2 <= 2) {
gen_mxu_s32ldxvx(ctx, opcode, false, strd2);
break;
}
/* fallthrough */
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool07(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 10, 4);
uint32_t strd2 = extract32(ctx->opcode, 14, 2);
switch (opcode) {
case OPC_MXU_S32LDST:
case OPC_MXU_S32LDSTR:
if (strd2 <= 2) {
gen_mxu_s32stxvx(ctx, opcode, false, strd2);
break;
}
/* fallthrough */
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool08(DisasContext *ctx)
{
uint32_t reversed = extract32(ctx->opcode, 20, 1);
uint32_t opcode = extract32(ctx->opcode, 10, 4);
/* Don't care about opcode bits as their meaning is unknown yet */
switch (opcode) {
default:
gen_mxu_s32ldxx(ctx, reversed, true);
break;
}
}
static void decode_opc_mxu__pool09(DisasContext *ctx)
{
uint32_t reversed = extract32(ctx->opcode, 20, 1);
uint32_t opcode = extract32(ctx->opcode, 10, 4);
/* Don't care about opcode bits as their meaning is unknown yet */
switch (opcode) {
default:
gen_mxu_s32stxx(ctx, reversed, true);
break;
}
}
static void decode_opc_mxu__pool10(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 10, 4);
uint32_t strd2 = extract32(ctx->opcode, 14, 2);
switch (opcode) {
case OPC_MXU_S32LDST:
case OPC_MXU_S32LDSTR:
if (strd2 <= 2) {
gen_mxu_s32ldxvx(ctx, opcode, true, strd2);
break;
}
/* fallthrough */
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool11(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 10, 4);
uint32_t strd2 = extract32(ctx->opcode, 14, 2);
switch (opcode) {
case OPC_MXU_S32LDST:
case OPC_MXU_S32LDSTR:
if (strd2 <= 2) {
gen_mxu_s32stxvx(ctx, opcode, true, strd2);
break;
}
/* fallthrough */
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool12(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 22, 2);
switch (opcode) {
case OPC_MXU_D32ACC:
gen_mxu_d32acc(ctx);
break;
case OPC_MXU_D32ACCM:
gen_mxu_d32accm(ctx);
break;
case OPC_MXU_D32ASUM:
gen_mxu_d32asum(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool13(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 22, 2);
switch (opcode) {
case OPC_MXU_Q16ACC:
gen_mxu_q16acc(ctx);
break;
case OPC_MXU_Q16ACCM:
gen_mxu_q16accm(ctx);
break;
case OPC_MXU_D16ASUM:
gen_mxu_d16asum(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool14(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 22, 2);
switch (opcode) {
case OPC_MXU_Q8ADDE:
gen_mxu_q8adde(ctx, false);
break;
case OPC_MXU_D8SUM:
gen_mxu_d8sum(ctx, false);
break;
case OPC_MXU_D8SUMC:
gen_mxu_d8sum(ctx, true);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool15(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 14, 2);
switch (opcode) {
case OPC_MXU_S32MUL:
gen_mxu_s32mul(ctx, false);
break;
case OPC_MXU_S32MULU:
gen_mxu_s32mul(ctx, true);
break;
case OPC_MXU_S32EXTR:
gen_mxu_s32extr(ctx);
break;
case OPC_MXU_S32EXTRV:
gen_mxu_s32extrv(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool16(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 18, 3);
switch (opcode) {
case OPC_MXU_D32SARW:
gen_mxu_d32sarl(ctx, true);
break;
case OPC_MXU_S32ALN:
gen_mxu_S32ALN(ctx);
break;
case OPC_MXU_S32ALNI:
gen_mxu_S32ALNI(ctx);
break;
case OPC_MXU_S32LUI:
gen_mxu_s32lui(ctx);
break;
case OPC_MXU_S32NOR:
gen_mxu_S32NOR(ctx);
break;
case OPC_MXU_S32AND:
gen_mxu_S32AND(ctx);
break;
case OPC_MXU_S32OR:
gen_mxu_S32OR(ctx);
break;
case OPC_MXU_S32XOR:
gen_mxu_S32XOR(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool17(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 6, 3);
uint32_t strd2 = extract32(ctx->opcode, 9, 2);
if (strd2 > 2) {
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
return;
}
switch (opcode) {
case OPC_MXU_LXW:
gen_mxu_lxx(ctx, strd2, MO_TE | MO_UL);
break;
case OPC_MXU_LXB:
gen_mxu_lxx(ctx, strd2, MO_TE | MO_SB);
break;
case OPC_MXU_LXH:
gen_mxu_lxx(ctx, strd2, MO_TE | MO_SW);
break;
case OPC_MXU_LXBU:
gen_mxu_lxx(ctx, strd2, MO_TE | MO_UB);
break;
case OPC_MXU_LXHU:
gen_mxu_lxx(ctx, strd2, MO_TE | MO_UW);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool18(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 18, 3);
switch (opcode) {
case OPC_MXU_D32SLLV:
gen_mxu_d32sxxv(ctx, false, false);
break;
case OPC_MXU_D32SLRV:
gen_mxu_d32sxxv(ctx, true, false);
break;
case OPC_MXU_D32SARV:
gen_mxu_d32sxxv(ctx, true, true);
break;
case OPC_MXU_Q16SLLV:
gen_mxu_q16sxxv(ctx, false, false);
break;
case OPC_MXU_Q16SLRV:
gen_mxu_q16sxxv(ctx, true, false);
break;
case OPC_MXU_Q16SARV:
gen_mxu_q16sxxv(ctx, true, true);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool19(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 22, 4);
switch (opcode) {
case OPC_MXU_Q8MUL:
gen_mxu_q8mul_mac(ctx, false, false);
break;
case OPC_MXU_Q8MULSU:
gen_mxu_q8mul_mac(ctx, true, false);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool20(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 18, 3);
switch (opcode) {
case OPC_MXU_Q8MOVZ:
gen_mxu_q8movzn(ctx, TCG_COND_NE);
break;
case OPC_MXU_Q8MOVN:
gen_mxu_q8movzn(ctx, TCG_COND_EQ);
break;
case OPC_MXU_D16MOVZ:
gen_mxu_d16movzn(ctx, TCG_COND_NE);
break;
case OPC_MXU_D16MOVN:
gen_mxu_d16movzn(ctx, TCG_COND_EQ);
break;
case OPC_MXU_S32MOVZ:
gen_mxu_s32movzn(ctx, TCG_COND_NE);
break;
case OPC_MXU_S32MOVN:
gen_mxu_s32movzn(ctx, TCG_COND_EQ);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool21(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 22, 2);
switch (opcode) {
case OPC_MXU_Q8MAC:
gen_mxu_q8mul_mac(ctx, false, true);
break;
case OPC_MXU_Q8MACSU:
gen_mxu_q8mul_mac(ctx, true, true);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
{
uint32_t opcode = extract32(insn, 0, 6);
if (opcode == OPC_MXU_S32M2I) {
gen_mxu_s32m2i(ctx);
return true;
}
if (opcode == OPC_MXU_S32I2M) {
gen_mxu_s32i2m(ctx);
return true;
}
{
TCGv t_mxu_cr = tcg_temp_new();
TCGLabel *l_exit = gen_new_label();
gen_load_mxu_cr(t_mxu_cr);
tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN);
tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit);
switch (opcode) {
case OPC_MXU_S32MADD:
case OPC_MXU_S32MADDU:
case OPC_MXU_S32MSUB:
case OPC_MXU_S32MSUBU:
return decode_opc_mxu_s32madd_sub(ctx);
case OPC_MXU__POOL00:
decode_opc_mxu__pool00(ctx);
break;
case OPC_MXU_D16MUL:
gen_mxu_d16mul(ctx, false, false);
break;
case OPC_MXU_D16MAC:
gen_mxu_d16mac(ctx, false, false);
break;
case OPC_MXU_D16MACF:
gen_mxu_d16mac(ctx, true, true);
break;
case OPC_MXU_D16MADL:
gen_mxu_d16madl(ctx);
break;
case OPC_MXU_S16MAD:
gen_mxu_s16mad(ctx);
break;
case OPC_MXU_Q16ADD:
gen_mxu_q16add(ctx);
break;
case OPC_MXU_D16MACE:
gen_mxu_d16mac(ctx, true, false);
break;
case OPC_MXU__POOL01:
decode_opc_mxu__pool01(ctx);
break;
case OPC_MXU__POOL02:
decode_opc_mxu__pool02(ctx);
break;
case OPC_MXU__POOL03:
decode_opc_mxu__pool03(ctx);
break;
case OPC_MXU__POOL04:
decode_opc_mxu__pool04(ctx);
break;
case OPC_MXU__POOL05:
decode_opc_mxu__pool05(ctx);
break;
case OPC_MXU__POOL06:
decode_opc_mxu__pool06(ctx);
break;
case OPC_MXU__POOL07:
decode_opc_mxu__pool07(ctx);
break;
case OPC_MXU__POOL08:
decode_opc_mxu__pool08(ctx);
break;
case OPC_MXU__POOL09:
decode_opc_mxu__pool09(ctx);
break;
case OPC_MXU__POOL10:
decode_opc_mxu__pool10(ctx);
break;
case OPC_MXU__POOL11:
decode_opc_mxu__pool11(ctx);
break;
case OPC_MXU_D32ADD:
gen_mxu_d32add(ctx);
break;
case OPC_MXU__POOL12:
decode_opc_mxu__pool12(ctx);
break;
case OPC_MXU__POOL13:
decode_opc_mxu__pool13(ctx);
break;
case OPC_MXU__POOL14:
decode_opc_mxu__pool14(ctx);
break;
case OPC_MXU_Q8ACCE:
gen_mxu_q8adde(ctx, true);
break;
case OPC_MXU_S8LDD:
gen_mxu_s8ldd(ctx, false);
break;
case OPC_MXU_S8STD:
gen_mxu_s8std(ctx, false);
break;
case OPC_MXU_S8LDI:
gen_mxu_s8ldd(ctx, true);
break;
case OPC_MXU_S8SDI:
gen_mxu_s8std(ctx, true);
break;
case OPC_MXU__POOL15:
decode_opc_mxu__pool15(ctx);
break;
case OPC_MXU__POOL16:
decode_opc_mxu__pool16(ctx);
break;
case OPC_MXU__POOL17:
decode_opc_mxu__pool17(ctx);
break;
case OPC_MXU_S16LDD:
gen_mxu_s16ldd(ctx, false);
break;
case OPC_MXU_S16STD:
gen_mxu_s16std(ctx, false);
break;
case OPC_MXU_S16LDI:
gen_mxu_s16ldd(ctx, true);
break;
case OPC_MXU_S16SDI:
gen_mxu_s16std(ctx, true);
break;
case OPC_MXU_D32SLL:
gen_mxu_d32sxx(ctx, false, false);
break;
case OPC_MXU_D32SLR:
gen_mxu_d32sxx(ctx, true, false);
break;
case OPC_MXU_D32SARL:
gen_mxu_d32sarl(ctx, false);
break;
case OPC_MXU_D32SAR:
gen_mxu_d32sxx(ctx, true, true);
break;
case OPC_MXU_Q16SLL:
gen_mxu_q16sxx(ctx, false, false);
break;
case OPC_MXU__POOL18:
decode_opc_mxu__pool18(ctx);
break;
case OPC_MXU_Q16SLR:
gen_mxu_q16sxx(ctx, true, false);
break;
case OPC_MXU_Q16SAR:
gen_mxu_q16sxx(ctx, true, true);
break;
case OPC_MXU__POOL19:
decode_opc_mxu__pool19(ctx);
break;
case OPC_MXU__POOL20:
decode_opc_mxu__pool20(ctx);
break;
case OPC_MXU__POOL21:
decode_opc_mxu__pool21(ctx);
break;
case OPC_MXU_Q16SCOP:
gen_mxu_q16scop(ctx);
break;
case OPC_MXU_Q8MADL:
gen_mxu_q8madl(ctx);
break;
case OPC_MXU_S32SFL:
gen_mxu_s32sfl(ctx);
break;
case OPC_MXU_Q8SAD:
gen_mxu_q8sad(ctx);
break;
default:
return false;
}
gen_set_label(l_exit);
}
return true;
}