tcg: Vary the allocation size for TCGOp
We have been allocating a worst case number of arguments to support calls. Instead, allow the size to vary. By default leave space for 4 args, to maximize reuse, but allow calls to increase the number of args to 32. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> [PMD: Split patch in two] Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org> Message-Id: <20221218211832.73312-3-philmd@linaro.org>
This commit is contained in:
parent
d44789434b
commit
cb10bc63b7
@ -258,12 +258,12 @@ static TCGOp *rm_ops(TCGOp *op)
|
||||
|
||||
static TCGOp *copy_op_nocheck(TCGOp **begin_op, TCGOp *op)
|
||||
{
|
||||
unsigned nargs = ARRAY_SIZE(op->args);
|
||||
TCGOp *old_op = QTAILQ_NEXT(*begin_op, link);
|
||||
unsigned nargs = old_op->nargs;
|
||||
|
||||
*begin_op = QTAILQ_NEXT(*begin_op, link);
|
||||
tcg_debug_assert(*begin_op);
|
||||
op = tcg_op_insert_after(tcg_ctx, op, (*begin_op)->opc, nargs);
|
||||
memcpy(op->args, (*begin_op)->args, sizeof(op->args));
|
||||
*begin_op = old_op;
|
||||
op = tcg_op_insert_after(tcg_ctx, op, old_op->opc, nargs);
|
||||
memcpy(op->args, old_op->args, sizeof(op->args[0]) * nargs);
|
||||
|
||||
return op;
|
||||
}
|
||||
|
@ -133,6 +133,4 @@
|
||||
#define DEF_HELPER_7(name, ret, t1, t2, t3, t4, t5, t6, t7) \
|
||||
DEF_HELPER_FLAGS_7(name, 0, ret, t1, t2, t3, t4, t5, t6, t7)
|
||||
|
||||
/* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */
|
||||
|
||||
#endif /* EXEC_HELPER_HEAD_H */
|
||||
|
@ -38,20 +38,6 @@
|
||||
/* XXX: make safe guess about sizes */
|
||||
#define MAX_OP_PER_INSTR 266
|
||||
|
||||
#if HOST_LONG_BITS == 32
|
||||
#define MAX_OPC_PARAM_PER_ARG 2
|
||||
#else
|
||||
#define MAX_OPC_PARAM_PER_ARG 1
|
||||
#endif
|
||||
#define MAX_OPC_PARAM_IARGS 7
|
||||
#define MAX_OPC_PARAM_OARGS 1
|
||||
#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
|
||||
|
||||
/* A Call op needs up to 4 + 2N parameters on 32-bit archs,
|
||||
* and up to 4 + N parameters on 64-bit archs
|
||||
* (N = number of input arguments + output arguments). */
|
||||
#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS))
|
||||
|
||||
#define CPU_TEMP_BUF_NLONGS 128
|
||||
#define TCG_STATIC_FRAME_SIZE (CPU_TEMP_BUF_NLONGS * sizeof(long))
|
||||
|
||||
@ -493,34 +479,34 @@ typedef struct TCGTempSet {
|
||||
unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
|
||||
} TCGTempSet;
|
||||
|
||||
/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding,
|
||||
this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands.
|
||||
There are never more than 2 outputs, which means that we can store all
|
||||
dead + sync data within 16 bits. */
|
||||
#define DEAD_ARG 4
|
||||
#define SYNC_ARG 1
|
||||
typedef uint16_t TCGLifeData;
|
||||
/*
|
||||
* With 1 128-bit output, a 32-bit host requires 4 output parameters,
|
||||
* which leaves a maximum of 28 other slots. Which is enough for 7
|
||||
* 128-bit operands.
|
||||
*/
|
||||
#define DEAD_ARG (1 << 4)
|
||||
#define SYNC_ARG (1 << 0)
|
||||
typedef uint32_t TCGLifeData;
|
||||
|
||||
/* The layout here is designed to avoid a bitfield crossing of
|
||||
a 32-bit boundary, which would cause GCC to add extra padding. */
|
||||
typedef struct TCGOp {
|
||||
TCGOpcode opc : 8; /* 8 */
|
||||
TCGOpcode opc : 8;
|
||||
unsigned nargs : 8;
|
||||
|
||||
/* Parameters for this opcode. See below. */
|
||||
unsigned param1 : 4; /* 12 */
|
||||
unsigned param2 : 4; /* 16 */
|
||||
unsigned param1 : 8;
|
||||
unsigned param2 : 8;
|
||||
|
||||
/* Lifetime data of the operands. */
|
||||
unsigned life : 16; /* 32 */
|
||||
TCGLifeData life;
|
||||
|
||||
/* Next and previous opcodes. */
|
||||
QTAILQ_ENTRY(TCGOp) link;
|
||||
|
||||
/* Arguments for the opcode. */
|
||||
TCGArg args[MAX_OPC_PARAM];
|
||||
|
||||
/* Register preferences for the output(s). */
|
||||
TCGRegSet output_pref[2];
|
||||
|
||||
/* Arguments for the opcode. */
|
||||
TCGArg args[];
|
||||
} TCGOp;
|
||||
|
||||
#define TCGOP_CALLI(X) (X)->param1
|
||||
|
35
tcg/tcg.c
35
tcg/tcg.c
@ -1513,7 +1513,12 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
|
||||
}
|
||||
}
|
||||
|
||||
max_args = ARRAY_SIZE(op->args);
|
||||
/*
|
||||
* A Call op needs up to 4 + 2N parameters on 32-bit archs,
|
||||
* and up to 4 + N parameters on 64-bit archs
|
||||
* (N = number of input arguments + output arguments).
|
||||
*/
|
||||
max_args = (64 / TCG_TARGET_REG_BITS) * nargs + 4;
|
||||
op = tcg_emit_op(INDEX_op_call, max_args);
|
||||
|
||||
pi = 0;
|
||||
@ -2298,19 +2303,31 @@ void tcg_remove_ops_after(TCGOp *op)
|
||||
static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
|
||||
{
|
||||
TCGContext *s = tcg_ctx;
|
||||
TCGOp *op;
|
||||
TCGOp *op = NULL;
|
||||
|
||||
assert(nargs < ARRAY_SIZE(op->args));
|
||||
if (likely(QTAILQ_EMPTY(&s->free_ops))) {
|
||||
op = tcg_malloc(sizeof(TCGOp));
|
||||
} else {
|
||||
op = QTAILQ_FIRST(&s->free_ops);
|
||||
QTAILQ_REMOVE(&s->free_ops, op, link);
|
||||
if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
|
||||
QTAILQ_FOREACH(op, &s->free_ops, link) {
|
||||
if (nargs <= op->nargs) {
|
||||
QTAILQ_REMOVE(&s->free_ops, op, link);
|
||||
nargs = op->nargs;
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Most opcodes have 3 or 4 operands: reduce fragmentation. */
|
||||
nargs = MAX(4, nargs);
|
||||
op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
|
||||
|
||||
found:
|
||||
memset(op, 0, offsetof(TCGOp, link));
|
||||
op->opc = opc;
|
||||
s->nb_ops++;
|
||||
op->nargs = nargs;
|
||||
|
||||
/* Check for bitfield overflow. */
|
||||
tcg_debug_assert(op->nargs == nargs);
|
||||
|
||||
s->nb_ops++;
|
||||
return op;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user