From 0bd12bdf0ae357be520c43af40640af5d8183ded Mon Sep 17 00:00:00 2001 From: "K. Lange" Date: Sun, 3 Mar 2024 22:18:15 +0900 Subject: [PATCH] Support overly-long jumps with new instruction --- src/compiler.c | 40 ++++++++++++++++++++++++++++++++++-- src/debug.c | 44 ++++++++++++++++++++++++++++++++++++++++ src/kuroko/object.h | 9 ++++++++ src/memory.c | 1 + src/modules/module_dis.c | 6 ++++++ src/opcode_enum.h | 4 ++++ src/opcodes.h | 1 + src/vm.c | 18 +++++++++++++++- 8 files changed, 120 insertions(+), 3 deletions(-) diff --git a/src/compiler.c b/src/compiler.c index db3fc6a..b39d96d 100644 --- a/src/compiler.c +++ b/src/compiler.c @@ -827,9 +827,43 @@ static int _emitJump(struct GlobalState * state, uint8_t opcode) { } #define emitJump(o) _emitJump(state,o) +/** + * @brief Emit over-long jump target. + * + * Our jump instructions take only two bytes as operands, as that typically suffices + * for storing the appropriate forward or backwards offset, without needing to waste + * lots of bytes for small jumps, or recalculate everything to expand a jump to + * fit a larger offset. If we *do* get a jump offset that is too big to fit in our + * available operand space, we replace the whole instruction with one that fetches + * the desired target, slowly, from a table attached to the codeobject, alongside + * the original instruction opcode. + */ +static void _emitOverlongJump(struct GlobalState * state, int offset, int jump) { + KrkCodeObject * co = state->current->codeobject; + size_t i = 0; + while (i < co->overlongJumpsCount && co->overlongJumps[i].instructionOffset != (uint32_t)offset) i++; + if (i == co->overlongJumpsCount) { + /* Not an existing overlong jump, need to make a new one. */ + if (co->overlongJumpsCount + 1 > co->overlongJumpsCapacity) { + size_t old = co->overlongJumpsCapacity; + co->overlongJumpsCapacity = KRK_GROW_CAPACITY(old); + co->overlongJumps = KRK_GROW_ARRAY(KrkOverlongJump,co->overlongJumps,old,co->overlongJumpsCapacity); + } + co->overlongJumps[i].instructionOffset = offset; + co->overlongJumps[i].originalOpcode = currentChunk()->code[offset-1]; + co->overlongJumpsCount++; + currentChunk()->code[offset-1] = OP_OVERLONG_JUMP; + } + /* Update jump target */ + co->overlongJumps[i].intendedTarget = jump >> 16; +} + static void _patchJump(struct GlobalState * state, int offset) { int jump = currentChunk()->count - offset - 2; - if (jump > 0xFFFF) error("Jump offset is too large for opcode."); + + if (jump > 0xFFFF) { + _emitOverlongJump(state, offset, jump); + } currentChunk()->code[offset] = (jump >> 8) & 0xFF; currentChunk()->code[offset + 1] = (jump) & 0xFF; @@ -2019,7 +2053,9 @@ static void emitLoop(struct GlobalState * state, int loopStart, uint8_t loopType emitByte(loopType); int offset = currentChunk()->count - loopStart + ((loopType == OP_LOOP_ITER) ? -1 : 2); - if (offset > 0xFFFF) error("Loop jump offset is too large for opcode."); + if (offset > 0xFFFF) { + _emitOverlongJump(state, currentChunk()->count, offset); + } emitBytes(offset >> 8, offset); /* Patch break statements */ diff --git a/src/debug.c b/src/debug.c index 0ea9f31..caa09c9 100644 --- a/src/debug.c +++ b/src/debug.c @@ -209,6 +209,8 @@ static int isJumpTarget(KrkCodeObject * func, size_t startPoint) { #define JUMP(opc,sign) case opc: { uint16_t jump = (chunk->code[offset + 1] << 8) | (chunk->code[offset + 2]); \ krk_tableSet(AS_DICT(func->jumpTargets), INTEGER_VAL((size_t)(offset + 3 sign jump)), BOOLEAN_VAL(1)); \ size = 3; break; } +#define COMPLICATED(opc,more) case opc: size = 1; more; break; +#define OVERLONG_JUMP_MORE size += 2 #define CLOSURE_MORE \ KrkCodeObject * function = AS_codeobject(chunk->constants.values[constant]); \ for (size_t j = 0; j < function->upvalueCount; ++j) { \ @@ -235,6 +237,8 @@ static int isJumpTarget(KrkCodeObject * func, size_t startPoint) { #undef OPERAND #undef CONSTANT #undef JUMP +#undef COMPLICATED +#undef OVERLONG_JUMP_MORE #undef CLOSURE_MORE #undef LOCAL_MORE #undef EXPAND_ARGS_MORE @@ -284,6 +288,41 @@ static void _jump(OPARGS, int sign) { *size = 3; } +static void _complicated(OPARGS, void (*more)(OPARGS)) { + _print_opcode(OPARG_VALS); + if (more) more(OPARG_VALS); + else *size = 1; +} + +#define SIMPLE(opc) +#define JUMP(opc,sign) case opc: fprintf(f, "(%s, to %zu)", opcodeClean(#opc), *offset + 3 sign current_jump); return; +#define OPERAND(opc,more) +#define CONSTANT(opc,more) +#define COMPLICATED(opc,more) +static void _overlong_jump_more(OPARGS) { + size_t current_jump = (chunk->code[*offset + 1] << 8) | (chunk->code[*offset + 2]); + *size = 3; + + /* Now look it up */ + for (size_t i = 0; i < func->overlongJumpsCount; ++i) { + if (*offset + 1 == (size_t)func->overlongJumps[i].instructionOffset) { + current_jump |= ((size_t)func->overlongJumps[i].intendedTarget << 16); + switch (func->overlongJumps[i].originalOpcode) { +#include "opcodes.h" + default: break; + } + } + } + + fprintf(f,"(invalid destination)"); +} +#undef SIMPLE +#undef OPERAND +#undef CONSTANT +#undef JUMP +#undef COMPLICATED + + #undef NOOP #define NOOP (NULL) #define SIMPLE(opc) case opc: _simple(f,#opc,&size,&offset,func,chunk); break; @@ -292,6 +331,9 @@ static void _jump(OPARGS, int sign) { #define OPERAND(opc,more) case opc: _operand(f,#opc,&size,&offset,func,chunk,0,more); break; \ case opc ## _LONG: _operand(f,#opc "_LONG",&size,&offset,func,chunk,1,more); break; #define JUMP(opc,sign) case opc: _jump(f,#opc,&size,&offset,func,chunk,sign 1); break; +#define COMPLICATED(opc,more) case opc: _complicated(f,#opc,&size,&offset,func,chunk,more); break; + +#define OVERLONG_JUMP_MORE _overlong_jump_more #define CLOSURE_MORE _closure_more @@ -401,6 +443,8 @@ size_t krk_disassembleInstruction(FILE * f, KrkCodeObject * func, size_t offset) #undef OPERAND #undef CONSTANT #undef JUMP +#undef COMPLICATED +#undef OVERLONG_JUMP_MORE #undef CLOSURE_MORE #undef LOCAL_MORE #undef EXPAND_ARGS_MORE diff --git a/src/kuroko/object.h b/src/kuroko/object.h index a810ec6..00f0259 100644 --- a/src/kuroko/object.h +++ b/src/kuroko/object.h @@ -148,6 +148,12 @@ typedef struct { struct KrkInstance; +typedef struct { + uint32_t instructionOffset; /**< @brief Instruction (operand offset) this jump target applies to */ + uint16_t intendedTarget; /**< @brief High bytes of the intended target. */ + uint8_t originalOpcode; /**< @brief Original jump opcode to execute. */ +} KrkOverlongJump; + /** * @brief Code object. * @extends KrkObj @@ -174,6 +180,9 @@ typedef struct { size_t expressionsCount; /**< @brief Number of entries in @ref expressions */ KrkExpressionsMap * expressions; /**< @brief Mapping of bytecode offsets to expression spans for debugging */ KrkValue jumpTargets; /**< @brief Possibly a set of jump targets... */ + KrkOverlongJump * overlongJumps; /**< @brief Pessimal overlong jump container */ + size_t overlongJumpsCapacity; /**< @brief Number of possible entries in pessimal jump table */ + size_t overlongJumpsCount; /**< @brief Number of entries in pessimal jump table */ } KrkCodeObject; diff --git a/src/memory.c b/src/memory.c index fc07306..f79fe68 100644 --- a/src/memory.c +++ b/src/memory.c @@ -216,6 +216,7 @@ static void freeObject(KrkObj * object) { krk_freeValueArray(&function->keywordArgNames); KRK_FREE_ARRAY(KrkLocalEntry, function->localNames, function->localNameCount); KRK_FREE_ARRAY(KrkExpressionsMap, function->expressions, function->expressionsCapacity); + KRK_FREE_ARRAY(KrkOverlongJump, function->overlongJumps, function->overlongJumpsCapacity); function->localNameCount = 0; FREE_OBJECT(KrkCodeObject, object); break; diff --git a/src/modules/module_dis.c b/src/modules/module_dis.c index 45c461b..7f0605e 100644 --- a/src/modules/module_dis.c +++ b/src/modules/module_dis.c @@ -140,6 +140,8 @@ KRK_Function(build) { (chunk->code[offset + 2] << 8) | (chunk->code[offset + 3]); size = 4; more; break; } #define JUMP(opc,sign) case opc: { jump = 0 sign ((chunk->code[offset + 1] << 8) | (chunk->code[offset + 2])); \ size = 3; break; } +#define COMPLICATED(opc,more) case opc: size = 1; more; break; +#define OVERLONG_JUMP_MORE size = 3; jump = (chunk->code[offset + 1] << 8) | (chunk->code[offset + 2]) #define CLOSURE_MORE \ KrkCodeObject * function = AS_codeobject(chunk->constants.values[constant]); \ size_t baseOffset = offset; \ @@ -215,6 +217,8 @@ KRK_Function(examine) { #undef OPERAND #undef CONSTANT #undef JUMP +#undef COMPLICATED +#undef OVERLONG_JUMP_MORE #undef CLOSURE_MORE #undef LOCAL_MORE #undef EXPAND_ARGS_MORE @@ -327,12 +331,14 @@ KRK_Module(dis) { #define CONSTANT(opc,more) OPCODE(opc) OPCODE(opc ## _LONG) #define OPERAND(opc,more) OPCODE(opc) OPCODE(opc ## _LONG) #define JUMP(opc,sign) OPCODE(opc) +#define COMPLICATED(opc,more) OPCODE(opc) #include "opcodes.h" #undef SIMPLE #undef OPERANDB #undef OPERAND #undef CONSTANT #undef JUMP +#undef COMPLICATED if (runAs && !strcmp(runAs->chars,"__main__")) { /* Force `dis` into the module table early */ diff --git a/src/opcode_enum.h b/src/opcode_enum.h index f1c510a..484b2d5 100644 --- a/src/opcode_enum.h +++ b/src/opcode_enum.h @@ -25,16 +25,20 @@ typedef enum { #define CONSTANT(opc,more) OPCODE(opc) OPCODE(opc ## _LONG) #define OPERAND(opc,more) OPCODE(opc) OPCODE(opc ## _LONG) #define JUMP(opc,sign) OPCODE(opc) +#define COMPLICATED(opc,more) OPCODE(opc) #define CLOSURE_MORE #define EXPAND_ARGS_MORE #define FORMAT_VALUE_MORE #define LOCAL_MORE +#define OVERLONG_JUMP_MORE #include "opcodes.h" #undef SIMPLE #undef OPERANDB #undef OPERAND #undef CONSTANT #undef JUMP +#undef COMPLICATED +#undef OVERLONG_JUMP_MORE #undef CLOSURE_MORE #undef LOCAL_MORE #undef EXPAND_ARGS_MORE diff --git a/src/opcodes.h b/src/opcodes.h index 80ab28c..6d15319 100644 --- a/src/opcodes.h +++ b/src/opcodes.h @@ -122,3 +122,4 @@ SIMPLE(OP_TUPLE_FROM_LIST) OPERAND(OP_UNPACK_EX,NOOP) JUMP(OP_ENTER_EXCEPT,+) SIMPLE(OP_SWAP_POP) +COMPLICATED(OP_OVERLONG_JUMP,OVERLONG_JUMP_MORE) diff --git a/src/vm.c b/src/vm.c index 9aab832..cf12303 100644 --- a/src/vm.c +++ b/src/vm.c @@ -2156,10 +2156,11 @@ _resumeHook: (void)0; # define FALLTHROUGH #endif -#define TWO_BYTE_OPERAND { OPERAND = (frame->ip[0] << 8) | frame->ip[1]; frame->ip += 2; } +#define TWO_BYTE_OPERAND { OPERAND = OPERAND | (frame->ip[0] << 8) | frame->ip[1]; frame->ip += 2; } #define THREE_BYTE_OPERAND { OPERAND = (frame->ip[0] << 16) | (frame->ip[1] << 8); frame->ip += 2; } FALLTHROUGH #define ONE_BYTE_OPERAND { OPERAND = (OPERAND & ~0xFF) | READ_BYTE(); } +_switchEntry: (void)0; switch (opcode) { case OP_CLEANUP_WITH: { /* Top of stack is a HANDLER that should have had something loaded into it if it was still valid */ @@ -2448,6 +2449,21 @@ _finishReturn: (void)0; break; } + case OP_OVERLONG_JUMP: { + /* Overlong jumps replace 2-byte operand jump instructions with a zero-operand instruction that + * slowly scans through a dumb table to find the intended jump target and opcode. */ + for (size_t i = 0; i < frame->closure->function->overlongJumpsCount; ++i) { + if (frame->closure->function->overlongJumps[i].instructionOffset == + (size_t)((char*)frame->ip - (char*)frame->closure->function->chunk.code)) { + OPERAND = (int)frame->closure->function->overlongJumps[i].intendedTarget << 16; + opcode = frame->closure->function->overlongJumps[i].originalOpcode; + goto _switchEntry; + } + } + krk_runtimeError(vm.exceptions->valueError, "bad jump"); + goto _finishException; + } + /* * Two-byte operands */