qemu/target/mips/mxu_translate.c

1610 lines
55 KiB
C
Raw Normal View History

/*
* Ingenic XBurst Media eXtension Unit (MXU) translation routines.
*
* Copyright (c) 2004-2005 Jocelyn Mayer
* Copyright (c) 2006 Marius Groeger (FPU operations)
* Copyright (c) 2006 Thiemo Seufer (MIPS32R2 support)
* Copyright (c) 2009 CodeSourcery (MIPS16 and microMIPS support)
* Copyright (c) 2012 Jia Liu & Dongxue Zhang (MIPS ASE DSP support)
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*
* Datasheet:
*
* "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
* Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
*/
#include "qemu/osdep.h"
#include "tcg/tcg-op.h"
#include "exec/helper-gen.h"
#include "translate.h"
/*
*
* AN OVERVIEW OF MXU EXTENSION INSTRUCTION SET
* ============================================
*
*
* MXU (full name: MIPS eXtension/enhanced Unit) is a SIMD extension of MIPS32
* instructions set. It is designed to fit the needs of signal, graphical and
* video processing applications. MXU instruction set is used in Xburst family
* of microprocessors by Ingenic.
*
* MXU unit contains 17 registers called X0-X16. X0 is always zero, and X16 is
* the control register.
*
*
* The notation used in MXU assembler mnemonics
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* Register operands:
*
* XRa, XRb, XRc, XRd - MXU registers
* Rb, Rc, Rd, Rs, Rt - general purpose MIPS registers
*
* Non-register operands:
*
* aptn1 - 1-bit accumulate add/subtract pattern
* aptn2 - 2-bit accumulate add/subtract pattern
* eptn2 - 2-bit execute add/subtract pattern
* optn2 - 2-bit operand pattern
* optn3 - 3-bit operand pattern
* sft4 - 4-bit shift amount
* strd2 - 2-bit stride amount
*
* Prefixes:
*
* Level of parallelism: Operand size:
* S - single operation at a time 32 - word
* D - two operations in parallel 16 - half word
* Q - four operations in parallel 8 - byte
*
* Operations:
*
* ADD - Add or subtract
* ADDC - Add with carry-in
* ACC - Accumulate
* ASUM - Sum together then accumulate (add or subtract)
* ASUMC - Sum together then accumulate (add or subtract) with carry-in
* AVG - Average between 2 operands
* ABD - Absolute difference
* ALN - Align data
* AND - Logical bitwise 'and' operation
* CPS - Copy sign
* EXTR - Extract bits
* I2M - Move from GPR register to MXU register
* LDD - Load data from memory to XRF
* LDI - Load data from memory to XRF (and increase the address base)
* LUI - Load unsigned immediate
* MUL - Multiply
* MULU - Unsigned multiply
* MADD - 64-bit operand add 32x32 product
* MSUB - 64-bit operand subtract 32x32 product
* MAC - Multiply and accumulate (add or subtract)
* MAD - Multiply and add or subtract
* MAX - Maximum between 2 operands
* MIN - Minimum between 2 operands
* M2I - Move from MXU register to GPR register
* MOVZ - Move if zero
* MOVN - Move if non-zero
* NOR - Logical bitwise 'nor' operation
* OR - Logical bitwise 'or' operation
* STD - Store data from XRF to memory
* SDI - Store data from XRF to memory (and increase the address base)
* SLT - Set of less than comparison
* SAD - Sum of absolute differences
* SLL - Logical shift left
* SLR - Logical shift right
* SAR - Arithmetic shift right
* SAT - Saturation
* SFL - Shuffle
* SCOP - Calculate xs scope (-1, means x<0; 0, means x==0; 1, means x>0)
* XOR - Logical bitwise 'exclusive or' operation
*
* Suffixes:
*
* E - Expand results
* F - Fixed point multiplication
* L - Low part result
* R - Doing rounding
* V - Variable instead of immediate
* W - Combine above L and V
*
*
* The list of MXU instructions grouped by functionality
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* Load/Store instructions Multiplication instructions
* ----------------------- ---------------------------
*
* S32LDD XRa, Rb, s12 S32MADD XRa, XRd, Rs, Rt
* S32STD XRa, Rb, s12 S32MADDU XRa, XRd, Rs, Rt
* S32LDDV XRa, Rb, rc, strd2 S32MSUB XRa, XRd, Rs, Rt
* S32STDV XRa, Rb, rc, strd2 S32MSUBU XRa, XRd, Rs, Rt
* S32LDI XRa, Rb, s12 S32MUL XRa, XRd, Rs, Rt
* S32SDI XRa, Rb, s12 S32MULU XRa, XRd, Rs, Rt
* S32LDIV XRa, Rb, rc, strd2 D16MUL XRa, XRb, XRc, XRd, optn2
* S32SDIV XRa, Rb, rc, strd2 D16MULE XRa, XRb, XRc, optn2
* S32LDDR XRa, Rb, s12 D16MULF XRa, XRb, XRc, optn2
* S32STDR XRa, Rb, s12 D16MAC XRa, XRb, XRc, XRd, aptn2, optn2
* S32LDDVR XRa, Rb, rc, strd2 D16MACE XRa, XRb, XRc, XRd, aptn2, optn2
* S32STDVR XRa, Rb, rc, strd2 D16MACF XRa, XRb, XRc, XRd, aptn2, optn2
* S32LDIR XRa, Rb, s12 D16MADL XRa, XRb, XRc, XRd, aptn2, optn2
* S32SDIR XRa, Rb, s12 S16MAD XRa, XRb, XRc, XRd, aptn1, optn2
* S32LDIVR XRa, Rb, rc, strd2 Q8MUL XRa, XRb, XRc, XRd
* S32SDIVR XRa, Rb, rc, strd2 Q8MULSU XRa, XRb, XRc, XRd
* S16LDD XRa, Rb, s10, eptn2 Q8MAC XRa, XRb, XRc, XRd, aptn2
* S16STD XRa, Rb, s10, eptn2 Q8MACSU XRa, XRb, XRc, XRd, aptn2
* S16LDI XRa, Rb, s10, eptn2 Q8MADL XRa, XRb, XRc, XRd, aptn2
* S16SDI XRa, Rb, s10, eptn2
* S8LDD XRa, Rb, s8, eptn3
* S8STD XRa, Rb, s8, eptn3 Addition and subtraction instructions
* S8LDI XRa, Rb, s8, eptn3 -------------------------------------
* S8SDI XRa, Rb, s8, eptn3
* LXW Rd, Rs, Rt, strd2 D32ADD XRa, XRb, XRc, XRd, eptn2
* LXH Rd, Rs, Rt, strd2 D32ADDC XRa, XRb, XRc, XRd
* LXHU Rd, Rs, Rt, strd2 D32ACC XRa, XRb, XRc, XRd, eptn2
* LXB Rd, Rs, Rt, strd2 D32ACCM XRa, XRb, XRc, XRd, eptn2
* LXBU Rd, Rs, Rt, strd2 D32ASUM XRa, XRb, XRc, XRd, eptn2
* S32CPS XRa, XRb, XRc
* Q16ADD XRa, XRb, XRc, XRd, eptn2, optn2
* Comparison instructions Q16ACC XRa, XRb, XRc, XRd, eptn2
* ----------------------- Q16ACCM XRa, XRb, XRc, XRd, eptn2
* D16ASUM XRa, XRb, XRc, XRd, eptn2
* S32MAX XRa, XRb, XRc D16CPS XRa, XRb,
* S32MIN XRa, XRb, XRc D16AVG XRa, XRb, XRc
* S32SLT XRa, XRb, XRc D16AVGR XRa, XRb, XRc
* S32MOVZ XRa, XRb, XRc Q8ADD XRa, XRb, XRc, eptn2
* S32MOVN XRa, XRb, XRc Q8ADDE XRa, XRb, XRc, XRd, eptn2
* D16MAX XRa, XRb, XRc Q8ACCE XRa, XRb, XRc, XRd, eptn2
* D16MIN XRa, XRb, XRc Q8ABD XRa, XRb, XRc
* D16SLT XRa, XRb, XRc Q8SAD XRa, XRb, XRc, XRd
* D16MOVZ XRa, XRb, XRc Q8AVG XRa, XRb, XRc
* D16MOVN XRa, XRb, XRc Q8AVGR XRa, XRb, XRc
* Q8MAX XRa, XRb, XRc D8SUM XRa, XRb, XRc, XRd
* Q8MIN XRa, XRb, XRc D8SUMC XRa, XRb, XRc, XRd
* Q8SLT XRa, XRb, XRc
* Q8SLTU XRa, XRb, XRc
* Q8MOVZ XRa, XRb, XRc Shift instructions
* Q8MOVN XRa, XRb, XRc ------------------
*
* D32SLL XRa, XRb, XRc, XRd, sft4
* Bitwise instructions D32SLR XRa, XRb, XRc, XRd, sft4
* -------------------- D32SAR XRa, XRb, XRc, XRd, sft4
* D32SARL XRa, XRb, XRc, sft4
* S32NOR XRa, XRb, XRc D32SLLV XRa, XRb, Rb
* S32AND XRa, XRb, XRc D32SLRV XRa, XRb, Rb
* S32XOR XRa, XRb, XRc D32SARV XRa, XRb, Rb
* S32OR XRa, XRb, XRc D32SARW XRa, XRb, XRc, Rb
* Q16SLL XRa, XRb, XRc, XRd, sft4
* Q16SLR XRa, XRb, XRc, XRd, sft4
* Miscellaneous instructions Q16SAR XRa, XRb, XRc, XRd, sft4
* ------------------------- Q16SLLV XRa, XRb, Rb
* Q16SLRV XRa, XRb, Rb
* S32SFL XRa, XRb, XRc, XRd, optn2 Q16SARV XRa, XRb, Rb
* S32ALN XRa, XRb, XRc, Rb
* S32ALNI XRa, XRb, XRc, s3
* S32LUI XRa, s8, optn3 Move instructions
* S32EXTR XRa, XRb, Rb, bits5 -----------------
* S32EXTRV XRa, XRb, Rs, Rt
* Q16SCOP XRa, XRb, XRc, XRd S32M2I XRa, Rb
* Q16SAT XRa, XRb, XRc S32I2M XRa, Rb
*
*
* The opcode organization of MXU instructions
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* The bits 31..26 of all MXU instructions are equal to 0x1C (also referred
* as opcode SPECIAL2 in the base MIPS ISA). The organization and meaning of
* other bits up to the instruction level is as follows:
*
* bits
* 05..00
*
* 000000 OPC_MXU_S32MADD
* 000001 OPC_MXU_S32MADDU
* 000010 <not assigned> (non-MXU OPC_MUL)
*
* 20..18
* 000011 OPC_MXU__POOL00 000 OPC_MXU_S32MAX
* 001 OPC_MXU_S32MIN
* 010 OPC_MXU_D16MAX
* 011 OPC_MXU_D16MIN
* 100 OPC_MXU_Q8MAX
* 101 OPC_MXU_Q8MIN
* 110 OPC_MXU_Q8SLT
* 111 OPC_MXU_Q8SLTU
* 000100 OPC_MXU_S32MSUB
* 000101 OPC_MXU_S32MSUBU 20..18
* 000110 OPC_MXU__POOL01 000 OPC_MXU_S32SLT
* 001 OPC_MXU_D16SLT
* 010 OPC_MXU_D16AVG
* 011 OPC_MXU_D16AVGR
* 100 OPC_MXU_Q8AVG
* 101 OPC_MXU_Q8AVGR
* 111 OPC_MXU_Q8ADD
*
* 20..18
* 000111 OPC_MXU__POOL02 000 OPC_MXU_S32CPS
* 010 OPC_MXU_D16CPS
* 100 OPC_MXU_Q8ABD
* 110 OPC_MXU_Q16SAT
* 001000 OPC_MXU_D16MUL
* 25..24
* 001001 OPC_MXU__POOL03 00 OPC_MXU_D16MULF
* 01 OPC_MXU_D16MULE
* 001010 OPC_MXU_D16MAC
* 001011 OPC_MXU_D16MACF
* 001100 OPC_MXU_D16MADL
* 001101 OPC_MXU_S16MAD
* 001110 OPC_MXU_Q16ADD
* 001111 OPC_MXU_D16MACE 23
* 0 OPC_MXU_S32LDD
* 010000 OPC_MXU__POOL04 1 OPC_MXU_S32LDDR
*
* 23
* 010001 OPC_MXU__POOL05 0 OPC_MXU_S32STD
* 1 OPC_MXU_S32STDR
*
* 13..10
* 010010 OPC_MXU__POOL06 0000 OPC_MXU_S32LDDV
* 0001 OPC_MXU_S32LDDVR
*
* 13..10
* 010011 OPC_MXU__POOL07 0000 OPC_MXU_S32STDV
* 0001 OPC_MXU_S32STDVR
*
* 23
* 010100 OPC_MXU__POOL08 0 OPC_MXU_S32LDI
* 1 OPC_MXU_S32LDIR
*
* 23
* 010101 OPC_MXU__POOL09 0 OPC_MXU_S32SDI
* 1 OPC_MXU_S32SDIR
*
* 13..10
* 010110 OPC_MXU__POOL10 0000 OPC_MXU_S32LDIV
* 0001 OPC_MXU_S32LDIVR
*
* 13..10
* 010111 OPC_MXU__POOL11 0000 OPC_MXU_S32SDIV
* 0001 OPC_MXU_S32SDIVR
* 011000 OPC_MXU_D32ADD
* 23..22
* MXU 011001 OPC_MXU__POOL12 00 OPC_MXU_D32ACC
* opcodes 01 OPC_MXU_D32ACCM
* 10 OPC_MXU_D32ASUM
* 011010 <not assigned>
* 23..22
* 011011 OPC_MXU__POOL13 00 OPC_MXU_Q16ACC
* 01 OPC_MXU_Q16ACCM
* 10 OPC_MXU_Q16ASUM
*
* 23..22
* 011100 OPC_MXU__POOL14 00 OPC_MXU_Q8ADDE
* 01 OPC_MXU_D8SUM
* 011101 OPC_MXU_Q8ACCE 10 OPC_MXU_D8SUMC
* 011110 <not assigned>
* 011111 <not assigned>
* 100000 <not assigned> (overlaps with CLZ)
* 100001 <not assigned> (overlaps with CLO)
* 100010 OPC_MXU_S8LDD
* 100011 OPC_MXU_S8STD 15..14
* 100100 OPC_MXU_S8LDI 00 OPC_MXU_S32MUL
* 100101 OPC_MXU_S8SDI 00 OPC_MXU_S32MULU
* 00 OPC_MXU_S32EXTR
* 100110 OPC_MXU__POOL15 00 OPC_MXU_S32EXTRV
*
* 20..18
* 100111 OPC_MXU__POOL16 000 OPC_MXU_D32SARW
* 001 OPC_MXU_S32ALN
* 010 OPC_MXU_S32ALNI
* 011 OPC_MXU_S32LUI
* 100 OPC_MXU_S32NOR
* 101 OPC_MXU_S32AND
* 110 OPC_MXU_S32OR
* 111 OPC_MXU_S32XOR
*
* 7..5
* 101000 OPC_MXU__POOL17 000 OPC_MXU_LXB
* 001 OPC_MXU_LXH
* 101001 <not assigned> 011 OPC_MXU_LXW
* 101010 OPC_MXU_S16LDD 100 OPC_MXU_LXBU
* 101011 OPC_MXU_S16STD 101 OPC_MXU_LXHU
* 101100 OPC_MXU_S16LDI
* 101101 OPC_MXU_S16SDI
* 101110 OPC_MXU_S32M2I
* 101111 OPC_MXU_S32I2M
* 110000 OPC_MXU_D32SLL
* 110001 OPC_MXU_D32SLR 20..18
* 110010 OPC_MXU_D32SARL 000 OPC_MXU_D32SLLV
* 110011 OPC_MXU_D32SAR 001 OPC_MXU_D32SLRV
* 110100 OPC_MXU_Q16SLL 010 OPC_MXU_D32SARV
* 110101 OPC_MXU_Q16SLR 011 OPC_MXU_Q16SLLV
* 100 OPC_MXU_Q16SLRV
* 110110 OPC_MXU__POOL18 101 OPC_MXU_Q16SARV
*
* 110111 OPC_MXU_Q16SAR
* 23..22
* 111000 OPC_MXU__POOL19 00 OPC_MXU_Q8MUL
* 01 OPC_MXU_Q8MULSU
*
* 20..18
* 111001 OPC_MXU__POOL20 000 OPC_MXU_Q8MOVZ
* 001 OPC_MXU_Q8MOVN
* 010 OPC_MXU_D16MOVZ
* 011 OPC_MXU_D16MOVN
* 100 OPC_MXU_S32MOVZ
* 101 OPC_MXU_S32MOVN
*
* 23..22
* 111010 OPC_MXU__POOL21 00 OPC_MXU_Q8MAC
* 10 OPC_MXU_Q8MACSU
* 111011 OPC_MXU_Q16SCOP
* 111100 OPC_MXU_Q8MADL
* 111101 OPC_MXU_S32SFL
* 111110 OPC_MXU_Q8SAD
* 111111 <not assigned> (overlaps with SDBBP)
*
*
* Compiled after:
*
* "XBurst® Instruction Set Architecture MIPS eXtension/enhanced Unit
* Programming Manual", Ingenic Semiconductor Co, Ltd., revision June 2, 2017
*/
enum {
OPC_MXU__POOL00 = 0x03,
OPC_MXU_D16MUL = 0x08,
OPC_MXU_D16MAC = 0x0A,
OPC_MXU__POOL04 = 0x10,
OPC_MXU_S8LDD = 0x22,
OPC_MXU__POOL16 = 0x27,
OPC_MXU_S32M2I = 0x2E,
OPC_MXU_S32I2M = 0x2F,
OPC_MXU__POOL19 = 0x38,
};
/*
* MXU pool 00
*/
enum {
OPC_MXU_S32MAX = 0x00,
OPC_MXU_S32MIN = 0x01,
OPC_MXU_D16MAX = 0x02,
OPC_MXU_D16MIN = 0x03,
OPC_MXU_Q8MAX = 0x04,
OPC_MXU_Q8MIN = 0x05,
};
/*
* MXU pool 04
*/
enum {
OPC_MXU_S32LDD = 0x00,
OPC_MXU_S32LDDR = 0x01,
};
/*
* MXU pool 16
*/
enum {
OPC_MXU_S32ALNI = 0x02,
OPC_MXU_S32NOR = 0x04,
OPC_MXU_S32AND = 0x05,
OPC_MXU_S32OR = 0x06,
OPC_MXU_S32XOR = 0x07,
};
/*
* MXU pool 19
*/
enum {
OPC_MXU_Q8MUL = 0x00,
OPC_MXU_Q8MULSU = 0x01,
};
/* MXU accumulate add/subtract 1-bit pattern 'aptn1' */
#define MXU_APTN1_A 0
#define MXU_APTN1_S 1
/* MXU accumulate add/subtract 2-bit pattern 'aptn2' */
#define MXU_APTN2_AA 0
#define MXU_APTN2_AS 1
#define MXU_APTN2_SA 2
#define MXU_APTN2_SS 3
/* MXU execute add/subtract 2-bit pattern 'eptn2' */
#define MXU_EPTN2_AA 0
#define MXU_EPTN2_AS 1
#define MXU_EPTN2_SA 2
#define MXU_EPTN2_SS 3
/* MXU operand getting pattern 'optn2' */
#define MXU_OPTN2_PTN0 0
#define MXU_OPTN2_PTN1 1
#define MXU_OPTN2_PTN2 2
#define MXU_OPTN2_PTN3 3
/* alternative naming scheme for 'optn2' */
#define MXU_OPTN2_WW 0
#define MXU_OPTN2_LW 1
#define MXU_OPTN2_HW 2
#define MXU_OPTN2_XW 3
/* MXU operand getting pattern 'optn3' */
#define MXU_OPTN3_PTN0 0
#define MXU_OPTN3_PTN1 1
#define MXU_OPTN3_PTN2 2
#define MXU_OPTN3_PTN3 3
#define MXU_OPTN3_PTN4 4
#define MXU_OPTN3_PTN5 5
#define MXU_OPTN3_PTN6 6
#define MXU_OPTN3_PTN7 7
/* MXU registers */
static TCGv mxu_gpr[NUMBER_OF_MXU_REGISTERS - 1];
static TCGv mxu_CR;
static const char * const mxuregnames[] = {
"XR1", "XR2", "XR3", "XR4", "XR5", "XR6", "XR7", "XR8",
"XR9", "XR10", "XR11", "XR12", "XR13", "XR14", "XR15", "MXU_CR",
};
void mxu_translate_init(void)
{
for (unsigned i = 0; i < NUMBER_OF_MXU_REGISTERS - 1; i++) {
mxu_gpr[i] = tcg_global_mem_new(cpu_env,
offsetof(CPUMIPSState, active_tc.mxu_gpr[i]),
mxuregnames[i]);
}
mxu_CR = tcg_global_mem_new(cpu_env,
offsetof(CPUMIPSState, active_tc.mxu_cr),
mxuregnames[NUMBER_OF_MXU_REGISTERS - 1]);
}
/* MXU General purpose registers moves. */
static inline void gen_load_mxu_gpr(TCGv t, unsigned int reg)
{
if (reg == 0) {
tcg_gen_movi_tl(t, 0);
} else if (reg <= 15) {
tcg_gen_mov_tl(t, mxu_gpr[reg - 1]);
}
}
static inline void gen_store_mxu_gpr(TCGv t, unsigned int reg)
{
if (reg > 0 && reg <= 15) {
tcg_gen_mov_tl(mxu_gpr[reg - 1], t);
}
}
/* MXU control register moves. */
static inline void gen_load_mxu_cr(TCGv t)
{
tcg_gen_mov_tl(t, mxu_CR);
}
static inline void gen_store_mxu_cr(TCGv t)
{
/* TODO: Add handling of RW rules for MXU_CR. */
tcg_gen_mov_tl(mxu_CR, t);
}
/*
* S32I2M XRa, rb - Register move from GRF to XRF
*/
static void gen_mxu_s32i2m(DisasContext *ctx)
{
TCGv t0;
uint32_t XRa, Rb;
t0 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 5);
Rb = extract32(ctx->opcode, 16, 5);
gen_load_gpr(t0, Rb);
if (XRa <= 15) {
gen_store_mxu_gpr(t0, XRa);
} else if (XRa == 16) {
gen_store_mxu_cr(t0);
}
tcg_temp_free(t0);
}
/*
* S32M2I XRa, rb - Register move from XRF to GRF
*/
static void gen_mxu_s32m2i(DisasContext *ctx)
{
TCGv t0;
uint32_t XRa, Rb;
t0 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 5);
Rb = extract32(ctx->opcode, 16, 5);
if (XRa <= 15) {
gen_load_mxu_gpr(t0, XRa);
} else if (XRa == 16) {
gen_load_mxu_cr(t0);
}
gen_store_gpr(t0, Rb);
tcg_temp_free(t0);
}
/*
* S8LDD XRa, Rb, s8, optn3 - Load a byte from memory to XRF
*/
static void gen_mxu_s8ldd(DisasContext *ctx)
{
TCGv t0, t1;
uint32_t XRa, Rb, s8, optn3;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
s8 = extract32(ctx->opcode, 10, 8);
optn3 = extract32(ctx->opcode, 18, 3);
Rb = extract32(ctx->opcode, 21, 5);
gen_load_gpr(t0, Rb);
tcg_gen_addi_tl(t0, t0, (int8_t)s8);
switch (optn3) {
/* XRa[7:0] = tmp8 */
case MXU_OPTN3_PTN0:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
gen_load_mxu_gpr(t0, XRa);
tcg_gen_deposit_tl(t0, t0, t1, 0, 8);
break;
/* XRa[15:8] = tmp8 */
case MXU_OPTN3_PTN1:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
gen_load_mxu_gpr(t0, XRa);
tcg_gen_deposit_tl(t0, t0, t1, 8, 8);
break;
/* XRa[23:16] = tmp8 */
case MXU_OPTN3_PTN2:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
gen_load_mxu_gpr(t0, XRa);
tcg_gen_deposit_tl(t0, t0, t1, 16, 8);
break;
/* XRa[31:24] = tmp8 */
case MXU_OPTN3_PTN3:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
gen_load_mxu_gpr(t0, XRa);
tcg_gen_deposit_tl(t0, t0, t1, 24, 8);
break;
/* XRa = {8'b0, tmp8, 8'b0, tmp8} */
case MXU_OPTN3_PTN4:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
break;
/* XRa = {tmp8, 8'b0, tmp8, 8'b0} */
case MXU_OPTN3_PTN5:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
tcg_gen_shli_tl(t1, t1, 8);
tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
break;
/* XRa = {{8{sign of tmp8}}, tmp8, {8{sign of tmp8}}, tmp8} */
case MXU_OPTN3_PTN6:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_SB);
tcg_gen_mov_tl(t0, t1);
tcg_gen_andi_tl(t0, t0, 0xFF00FFFF);
tcg_gen_shli_tl(t1, t1, 16);
tcg_gen_or_tl(t0, t0, t1);
break;
/* XRa = {tmp8, tmp8, tmp8, tmp8} */
case MXU_OPTN3_PTN7:
tcg_gen_qemu_ld_tl(t1, t0, ctx->mem_idx, MO_UB);
tcg_gen_deposit_tl(t1, t1, t1, 8, 8);
tcg_gen_deposit_tl(t0, t1, t1, 16, 16);
break;
}
gen_store_mxu_gpr(t0, XRa);
tcg_temp_free(t0);
tcg_temp_free(t1);
}
/*
* D16MUL XRa, XRb, XRc, XRd, optn2 - Signed 16 bit pattern multiplication
*/
static void gen_mxu_d16mul(DisasContext *ctx)
{
TCGv t0, t1, t2, t3;
uint32_t XRa, XRb, XRc, XRd, optn2;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
t2 = tcg_temp_new();
t3 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
optn2 = extract32(ctx->opcode, 22, 2);
gen_load_mxu_gpr(t1, XRb);
tcg_gen_sextract_tl(t0, t1, 0, 16);
tcg_gen_sextract_tl(t1, t1, 16, 16);
gen_load_mxu_gpr(t3, XRc);
tcg_gen_sextract_tl(t2, t3, 0, 16);
tcg_gen_sextract_tl(t3, t3, 16, 16);
switch (optn2) {
case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
tcg_gen_mul_tl(t3, t1, t3);
tcg_gen_mul_tl(t2, t0, t2);
break;
case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
tcg_gen_mul_tl(t3, t0, t3);
tcg_gen_mul_tl(t2, t0, t2);
break;
case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
tcg_gen_mul_tl(t3, t1, t3);
tcg_gen_mul_tl(t2, t1, t2);
break;
case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
tcg_gen_mul_tl(t3, t0, t3);
tcg_gen_mul_tl(t2, t1, t2);
break;
}
gen_store_mxu_gpr(t3, XRa);
gen_store_mxu_gpr(t2, XRd);
tcg_temp_free(t0);
tcg_temp_free(t1);
tcg_temp_free(t2);
tcg_temp_free(t3);
}
/*
* D16MAC XRa, XRb, XRc, XRd, aptn2, optn2 - Signed 16 bit pattern multiply
* and accumulate
*/
static void gen_mxu_d16mac(DisasContext *ctx)
{
TCGv t0, t1, t2, t3;
uint32_t XRa, XRb, XRc, XRd, optn2, aptn2;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
t2 = tcg_temp_new();
t3 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
optn2 = extract32(ctx->opcode, 22, 2);
aptn2 = extract32(ctx->opcode, 24, 2);
gen_load_mxu_gpr(t1, XRb);
tcg_gen_sextract_tl(t0, t1, 0, 16);
tcg_gen_sextract_tl(t1, t1, 16, 16);
gen_load_mxu_gpr(t3, XRc);
tcg_gen_sextract_tl(t2, t3, 0, 16);
tcg_gen_sextract_tl(t3, t3, 16, 16);
switch (optn2) {
case MXU_OPTN2_WW: /* XRB.H*XRC.H == lop, XRB.L*XRC.L == rop */
tcg_gen_mul_tl(t3, t1, t3);
tcg_gen_mul_tl(t2, t0, t2);
break;
case MXU_OPTN2_LW: /* XRB.L*XRC.H == lop, XRB.L*XRC.L == rop */
tcg_gen_mul_tl(t3, t0, t3);
tcg_gen_mul_tl(t2, t0, t2);
break;
case MXU_OPTN2_HW: /* XRB.H*XRC.H == lop, XRB.H*XRC.L == rop */
tcg_gen_mul_tl(t3, t1, t3);
tcg_gen_mul_tl(t2, t1, t2);
break;
case MXU_OPTN2_XW: /* XRB.L*XRC.H == lop, XRB.H*XRC.L == rop */
tcg_gen_mul_tl(t3, t0, t3);
tcg_gen_mul_tl(t2, t1, t2);
break;
}
gen_load_mxu_gpr(t0, XRa);
gen_load_mxu_gpr(t1, XRd);
switch (aptn2) {
case MXU_APTN2_AA:
tcg_gen_add_tl(t3, t0, t3);
tcg_gen_add_tl(t2, t1, t2);
break;
case MXU_APTN2_AS:
tcg_gen_add_tl(t3, t0, t3);
tcg_gen_sub_tl(t2, t1, t2);
break;
case MXU_APTN2_SA:
tcg_gen_sub_tl(t3, t0, t3);
tcg_gen_add_tl(t2, t1, t2);
break;
case MXU_APTN2_SS:
tcg_gen_sub_tl(t3, t0, t3);
tcg_gen_sub_tl(t2, t1, t2);
break;
}
gen_store_mxu_gpr(t3, XRa);
gen_store_mxu_gpr(t2, XRd);
tcg_temp_free(t0);
tcg_temp_free(t1);
tcg_temp_free(t2);
tcg_temp_free(t3);
}
/*
* Q8MUL XRa, XRb, XRc, XRd - Parallel unsigned 8 bit pattern multiply
* Q8MULSU XRa, XRb, XRc, XRd - Parallel signed 8 bit pattern multiply
*/
static void gen_mxu_q8mul_q8mulsu(DisasContext *ctx)
{
TCGv t0, t1, t2, t3, t4, t5, t6, t7;
uint32_t XRa, XRb, XRc, XRd, sel;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
t2 = tcg_temp_new();
t3 = tcg_temp_new();
t4 = tcg_temp_new();
t5 = tcg_temp_new();
t6 = tcg_temp_new();
t7 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRc = extract32(ctx->opcode, 14, 4);
XRd = extract32(ctx->opcode, 18, 4);
sel = extract32(ctx->opcode, 22, 2);
gen_load_mxu_gpr(t3, XRb);
gen_load_mxu_gpr(t7, XRc);
if (sel == 0x2) {
/* Q8MULSU */
tcg_gen_ext8s_tl(t0, t3);
tcg_gen_shri_tl(t3, t3, 8);
tcg_gen_ext8s_tl(t1, t3);
tcg_gen_shri_tl(t3, t3, 8);
tcg_gen_ext8s_tl(t2, t3);
tcg_gen_shri_tl(t3, t3, 8);
tcg_gen_ext8s_tl(t3, t3);
} else {
/* Q8MUL */
tcg_gen_ext8u_tl(t0, t3);
tcg_gen_shri_tl(t3, t3, 8);
tcg_gen_ext8u_tl(t1, t3);
tcg_gen_shri_tl(t3, t3, 8);
tcg_gen_ext8u_tl(t2, t3);
tcg_gen_shri_tl(t3, t3, 8);
tcg_gen_ext8u_tl(t3, t3);
}
tcg_gen_ext8u_tl(t4, t7);
tcg_gen_shri_tl(t7, t7, 8);
tcg_gen_ext8u_tl(t5, t7);
tcg_gen_shri_tl(t7, t7, 8);
tcg_gen_ext8u_tl(t6, t7);
tcg_gen_shri_tl(t7, t7, 8);
tcg_gen_ext8u_tl(t7, t7);
tcg_gen_mul_tl(t0, t0, t4);
tcg_gen_mul_tl(t1, t1, t5);
tcg_gen_mul_tl(t2, t2, t6);
tcg_gen_mul_tl(t3, t3, t7);
tcg_gen_andi_tl(t0, t0, 0xFFFF);
tcg_gen_andi_tl(t1, t1, 0xFFFF);
tcg_gen_andi_tl(t2, t2, 0xFFFF);
tcg_gen_andi_tl(t3, t3, 0xFFFF);
tcg_gen_shli_tl(t1, t1, 16);
tcg_gen_shli_tl(t3, t3, 16);
tcg_gen_or_tl(t0, t0, t1);
tcg_gen_or_tl(t1, t2, t3);
gen_store_mxu_gpr(t0, XRd);
gen_store_mxu_gpr(t1, XRa);
tcg_temp_free(t0);
tcg_temp_free(t1);
tcg_temp_free(t2);
tcg_temp_free(t3);
tcg_temp_free(t4);
tcg_temp_free(t5);
tcg_temp_free(t6);
tcg_temp_free(t7);
}
/*
* S32LDD XRa, Rb, S12 - Load a word from memory to XRF
* S32LDDR XRa, Rb, S12 - Load a word from memory to XRF, reversed byte seq.
*/
static void gen_mxu_s32ldd_s32lddr(DisasContext *ctx)
{
TCGv t0, t1;
uint32_t XRa, Rb, s12, sel;
t0 = tcg_temp_new();
t1 = tcg_temp_new();
XRa = extract32(ctx->opcode, 6, 4);
s12 = extract32(ctx->opcode, 10, 10);
sel = extract32(ctx->opcode, 20, 1);
Rb = extract32(ctx->opcode, 21, 5);
gen_load_gpr(t0, Rb);
tcg_gen_movi_tl(t1, s12);
tcg_gen_shli_tl(t1, t1, 2);
if (s12 & 0x200) {
tcg_gen_ori_tl(t1, t1, 0xFFFFF000);
}
tcg_gen_add_tl(t1, t0, t1);
tcg_gen_qemu_ld_tl(t1, t1, ctx->mem_idx, MO_SL);
if (sel == 1) {
/* S32LDDR */
tcg_gen_bswap32_tl(t1, t1);
}
gen_store_mxu_gpr(t1, XRa);
tcg_temp_free(t0);
tcg_temp_free(t1);
}
/*
* MXU instruction category: logic
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* S32NOR S32AND S32OR S32XOR
*/
/*
* S32NOR XRa, XRb, XRc
* Update XRa with the result of logical bitwise 'nor' operation
* applied to the content of XRb and XRc.
*/
static void gen_mxu_S32NOR(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to all 1s */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0xFFFFFFFF);
} else if (unlikely(XRb == 0)) {
/* XRb zero register -> just set destination to the negation of XRc */
tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
} else if (unlikely(XRc == 0)) {
/* XRa zero register -> just set destination to the negation of XRb */
tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to the negation of XRb */
tcg_gen_not_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
tcg_gen_nor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
}
}
/*
* S32AND XRa, XRb, XRc
* Update XRa with the result of logical bitwise 'and' operation
* applied to the content of XRb and XRc.
*/
static void gen_mxu_S32AND(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) || (XRc == 0))) {
/* one of operands zero register -> just set destination to all 0s */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
tcg_gen_and_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
}
}
/*
* S32OR XRa, XRb, XRc
* Update XRa with the result of logical bitwise 'or' operation
* applied to the content of XRb and XRc.
*/
static void gen_mxu_S32OR(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to all 0s */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == 0)) {
/* XRb zero register -> just set destination to the content of XRc */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
} else if (unlikely(XRc == 0)) {
/* XRc zero register -> just set destination to the content of XRb */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
}
}
/*
* S32XOR XRa, XRb, XRc
* Update XRa with the result of logical bitwise 'xor' operation
* applied to the content of XRb and XRc.
*/
static void gen_mxu_S32XOR(DisasContext *ctx)
{
uint32_t pad, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to all 0s */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == 0)) {
/* XRb zero register -> just set destination to the content of XRc */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
} else if (unlikely(XRc == 0)) {
/* XRc zero register -> just set destination to the content of XRb */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to all 0s */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else {
/* the most general case */
tcg_gen_xor_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], mxu_gpr[XRc - 1]);
}
}
/*
* MXU instruction category max/min
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* S32MAX D16MAX Q8MAX
* S32MIN D16MIN Q8MIN
*/
/*
* S32MAX XRa, XRb, XRc
* Update XRa with the maximum of signed 32-bit integers contained
* in XRb and XRc.
*
* S32MIN XRa, XRb, XRc
* Update XRa with the minimum of signed 32-bit integers contained
* in XRb and XRc.
*/
static void gen_mxu_S32MAX_S32MIN(DisasContext *ctx)
{
uint32_t pad, opc, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
opc = extract32(ctx->opcode, 18, 3);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else if (unlikely((XRb == 0) || (XRc == 0))) {
/* exactly one operand is zero register - find which one is not...*/
uint32_t XRx = XRb ? XRb : XRc;
/* ...and do max/min operation with one operand 0 */
if (opc == OPC_MXU_S32MAX) {
tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
} else {
tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRx - 1], 0);
}
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
if (opc == OPC_MXU_S32MAX) {
tcg_gen_smax_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
mxu_gpr[XRc - 1]);
} else {
tcg_gen_smin_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1],
mxu_gpr[XRc - 1]);
}
}
}
/*
* D16MAX
* Update XRa with the 16-bit-wise maximums of signed integers
* contained in XRb and XRc.
*
* D16MIN
* Update XRa with the 16-bit-wise minimums of signed integers
* contained in XRb and XRc.
*/
static void gen_mxu_D16MAX_D16MIN(DisasContext *ctx)
{
uint32_t pad, opc, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
opc = extract32(ctx->opcode, 18, 3);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRc == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRa == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_i32(mxu_gpr[XRc - 1], 0);
} else if (unlikely((XRb == 0) || (XRa == 0))) {
/* exactly one operand is zero register - find which one is not...*/
uint32_t XRx = XRb ? XRb : XRc;
/* ...and do half-word-wise max/min with one operand 0 */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_const_i32(0);
/* the left half-word first */
tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFFFF0000);
if (opc == OPC_MXU_D16MAX) {
tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
} else {
tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
}
/* the right half-word */
tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0x0000FFFF);
/* move half-words to the leftmost position */
tcg_gen_shli_i32(t0, t0, 16);
/* t0 will be max/min of t0 and t1 */
if (opc == OPC_MXU_D16MAX) {
tcg_gen_smax_i32(t0, t0, t1);
} else {
tcg_gen_smin_i32(t0, t0, t1);
}
/* return resulting half-words to its original position */
tcg_gen_shri_i32(t0, t0, 16);
/* finally update the destination */
tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
tcg_temp_free(t1);
tcg_temp_free(t0);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
/* the left half-word first */
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFFFF0000);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
if (opc == OPC_MXU_D16MAX) {
tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
} else {
tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
}
/* the right half-word */
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0x0000FFFF);
/* move half-words to the leftmost position */
tcg_gen_shli_i32(t0, t0, 16);
tcg_gen_shli_i32(t1, t1, 16);
/* t0 will be max/min of t0 and t1 */
if (opc == OPC_MXU_D16MAX) {
tcg_gen_smax_i32(t0, t0, t1);
} else {
tcg_gen_smin_i32(t0, t0, t1);
}
/* return resulting half-words to its original position */
tcg_gen_shri_i32(t0, t0, 16);
/* finally update the destination */
tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
tcg_temp_free(t1);
tcg_temp_free(t0);
}
}
/*
* Q8MAX
* Update XRa with the 8-bit-wise maximums of signed integers
* contained in XRb and XRc.
*
* Q8MIN
* Update XRa with the 8-bit-wise minimums of signed integers
* contained in XRb and XRc.
*/
static void gen_mxu_Q8MAX_Q8MIN(DisasContext *ctx)
{
uint32_t pad, opc, XRc, XRb, XRa;
pad = extract32(ctx->opcode, 21, 5);
opc = extract32(ctx->opcode, 18, 3);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to zero */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else if (unlikely((XRb == 0) || (XRc == 0))) {
/* exactly one operand is zero register - make it be the first...*/
uint32_t XRx = XRb ? XRb : XRc;
/* ...and do byte-wise max/min with one operand 0 */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_const_i32(0);
int32_t i;
/* the leftmost byte (byte 3) first */
tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF000000);
if (opc == OPC_MXU_Q8MAX) {
tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
} else {
tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
}
/* bytes 2, 1, 0 */
for (i = 2; i >= 0; i--) {
/* extract the byte */
tcg_gen_andi_i32(t0, mxu_gpr[XRx - 1], 0xFF << (8 * i));
/* move the byte to the leftmost position */
tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
/* t0 will be max/min of t0 and t1 */
if (opc == OPC_MXU_Q8MAX) {
tcg_gen_smax_i32(t0, t0, t1);
} else {
tcg_gen_smin_i32(t0, t0, t1);
}
/* return resulting byte to its original position */
tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
/* finally update the destination */
tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
}
tcg_temp_free(t1);
tcg_temp_free(t0);
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just set destination to one of them */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
} else {
/* the most general case */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
int32_t i;
/* the leftmost bytes (bytes 3) first */
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF000000);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
if (opc == OPC_MXU_Q8MAX) {
tcg_gen_smax_i32(mxu_gpr[XRa - 1], t0, t1);
} else {
tcg_gen_smin_i32(mxu_gpr[XRa - 1], t0, t1);
}
/* bytes 2, 1, 0 */
for (i = 2; i >= 0; i--) {
/* extract corresponding bytes */
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0xFF << (8 * i));
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF << (8 * i));
/* move the bytes to the leftmost position */
tcg_gen_shli_i32(t0, t0, 8 * (3 - i));
tcg_gen_shli_i32(t1, t1, 8 * (3 - i));
/* t0 will be max/min of t0 and t1 */
if (opc == OPC_MXU_Q8MAX) {
tcg_gen_smax_i32(t0, t0, t1);
} else {
tcg_gen_smin_i32(t0, t0, t1);
}
/* return resulting byte to its original position */
tcg_gen_shri_i32(t0, t0, 8 * (3 - i));
/* finally update the destination */
tcg_gen_or_i32(mxu_gpr[XRa - 1], mxu_gpr[XRa - 1], t0);
}
tcg_temp_free(t1);
tcg_temp_free(t0);
}
}
/*
* MXU instruction category: align
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* S32ALN S32ALNI
*/
/*
* S32ALNI XRc, XRb, XRa, optn3
* Arrange bytes from XRb and XRc according to one of five sets of
* rules determined by optn3, and place the result in XRa.
*/
static void gen_mxu_S32ALNI(DisasContext *ctx)
{
uint32_t optn3, pad, XRc, XRb, XRa;
optn3 = extract32(ctx->opcode, 23, 3);
pad = extract32(ctx->opcode, 21, 2);
XRc = extract32(ctx->opcode, 14, 4);
XRb = extract32(ctx->opcode, 10, 4);
XRa = extract32(ctx->opcode, 6, 4);
if (unlikely(pad != 0)) {
/* opcode padding incorrect -> do nothing */
} else if (unlikely(XRa == 0)) {
/* destination is zero register -> do nothing */
} else if (unlikely((XRb == 0) && (XRc == 0))) {
/* both operands zero registers -> just set destination to all 0s */
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
} else if (unlikely(XRb == 0)) {
/* XRb zero register -> just appropriatelly shift XRc into XRa */
switch (optn3) {
case MXU_OPTN3_PTN0:
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
break;
case MXU_OPTN3_PTN1:
case MXU_OPTN3_PTN2:
case MXU_OPTN3_PTN3:
tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1],
8 * (4 - optn3));
break;
case MXU_OPTN3_PTN4:
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
break;
}
} else if (unlikely(XRc == 0)) {
/* XRc zero register -> just appropriatelly shift XRb into XRa */
switch (optn3) {
case MXU_OPTN3_PTN0:
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
break;
case MXU_OPTN3_PTN1:
case MXU_OPTN3_PTN2:
case MXU_OPTN3_PTN3:
tcg_gen_shri_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
break;
case MXU_OPTN3_PTN4:
tcg_gen_movi_i32(mxu_gpr[XRa - 1], 0);
break;
}
} else if (unlikely(XRb == XRc)) {
/* both operands same -> just rotation or moving from any of them */
switch (optn3) {
case MXU_OPTN3_PTN0:
case MXU_OPTN3_PTN4:
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
break;
case MXU_OPTN3_PTN1:
case MXU_OPTN3_PTN2:
case MXU_OPTN3_PTN3:
tcg_gen_rotli_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1], 8 * optn3);
break;
}
} else {
/* the most general case */
switch (optn3) {
case MXU_OPTN3_PTN0:
{
/* */
/* XRb XRc */
/* +---------------+ */
/* | A B C D | E F G H */
/* +-------+-------+ */
/* | */
/* XRa */
/* */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRb - 1]);
}
break;
case MXU_OPTN3_PTN1:
{
/* */
/* XRb XRc */
/* +-------------------+ */
/* A | B C D E | F G H */
/* +---------+---------+ */
/* | */
/* XRa */
/* */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x00FFFFFF);
tcg_gen_shli_i32(t0, t0, 8);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFF000000);
tcg_gen_shri_i32(t1, t1, 24);
tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
tcg_temp_free(t1);
tcg_temp_free(t0);
}
break;
case MXU_OPTN3_PTN2:
{
/* */
/* XRb XRc */
/* +-------------------+ */
/* A B | C D E F | G H */
/* +---------+---------+ */
/* | */
/* XRa */
/* */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x0000FFFF);
tcg_gen_shli_i32(t0, t0, 16);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFF0000);
tcg_gen_shri_i32(t1, t1, 16);
tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
tcg_temp_free(t1);
tcg_temp_free(t0);
}
break;
case MXU_OPTN3_PTN3:
{
/* */
/* XRb XRc */
/* +-------------------+ */
/* A B C | D E F G | H */
/* +---------+---------+ */
/* | */
/* XRa */
/* */
TCGv_i32 t0 = tcg_temp_new();
TCGv_i32 t1 = tcg_temp_new();
tcg_gen_andi_i32(t0, mxu_gpr[XRb - 1], 0x000000FF);
tcg_gen_shli_i32(t0, t0, 24);
tcg_gen_andi_i32(t1, mxu_gpr[XRc - 1], 0xFFFFFF00);
tcg_gen_shri_i32(t1, t1, 8);
tcg_gen_or_i32(mxu_gpr[XRa - 1], t0, t1);
tcg_temp_free(t1);
tcg_temp_free(t0);
}
break;
case MXU_OPTN3_PTN4:
{
/* */
/* XRb XRc */
/* +---------------+ */
/* A B C D | E F G H | */
/* +-------+-------+ */
/* | */
/* XRa */
/* */
tcg_gen_mov_i32(mxu_gpr[XRa - 1], mxu_gpr[XRc - 1]);
}
break;
}
}
}
/*
* Decoding engine for MXU
* =======================
*/
static void decode_opc_mxu__pool00(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 18, 3);
switch (opcode) {
case OPC_MXU_S32MAX:
case OPC_MXU_S32MIN:
gen_mxu_S32MAX_S32MIN(ctx);
break;
case OPC_MXU_D16MAX:
case OPC_MXU_D16MIN:
gen_mxu_D16MAX_D16MIN(ctx);
break;
case OPC_MXU_Q8MAX:
case OPC_MXU_Q8MIN:
gen_mxu_Q8MAX_Q8MIN(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool04(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 20, 1);
switch (opcode) {
case OPC_MXU_S32LDD:
case OPC_MXU_S32LDDR:
gen_mxu_s32ldd_s32lddr(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool16(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 18, 3);
switch (opcode) {
case OPC_MXU_S32ALNI:
gen_mxu_S32ALNI(ctx);
break;
case OPC_MXU_S32NOR:
gen_mxu_S32NOR(ctx);
break;
case OPC_MXU_S32AND:
gen_mxu_S32AND(ctx);
break;
case OPC_MXU_S32OR:
gen_mxu_S32OR(ctx);
break;
case OPC_MXU_S32XOR:
gen_mxu_S32XOR(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
static void decode_opc_mxu__pool19(DisasContext *ctx)
{
uint32_t opcode = extract32(ctx->opcode, 22, 2);
switch (opcode) {
case OPC_MXU_Q8MUL:
case OPC_MXU_Q8MULSU:
gen_mxu_q8mul_q8mulsu(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
break;
}
}
bool decode_ase_mxu(DisasContext *ctx, uint32_t insn)
{
uint32_t opcode = extract32(insn, 0, 6);
if (opcode == OPC_MXU_S32M2I) {
gen_mxu_s32m2i(ctx);
return true;
}
if (opcode == OPC_MXU_S32I2M) {
gen_mxu_s32i2m(ctx);
return true;
}
{
TCGv t_mxu_cr = tcg_temp_new();
TCGLabel *l_exit = gen_new_label();
gen_load_mxu_cr(t_mxu_cr);
tcg_gen_andi_tl(t_mxu_cr, t_mxu_cr, MXU_CR_MXU_EN);
tcg_gen_brcondi_tl(TCG_COND_NE, t_mxu_cr, MXU_CR_MXU_EN, l_exit);
switch (opcode) {
case OPC_MXU__POOL00:
decode_opc_mxu__pool00(ctx);
break;
case OPC_MXU_D16MUL:
gen_mxu_d16mul(ctx);
break;
case OPC_MXU_D16MAC:
gen_mxu_d16mac(ctx);
break;
case OPC_MXU__POOL04:
decode_opc_mxu__pool04(ctx);
break;
case OPC_MXU_S8LDD:
gen_mxu_s8ldd(ctx);
break;
case OPC_MXU__POOL16:
decode_opc_mxu__pool16(ctx);
break;
case OPC_MXU__POOL19:
decode_opc_mxu__pool19(ctx);
break;
default:
MIPS_INVAL("decode_opc_mxu");
gen_reserved_instruction(ctx);
}
gen_set_label(l_exit);
tcg_temp_free(t_mxu_cr);
}
return true;
}