326 lines
9.3 KiB
C
326 lines
9.3 KiB
C
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "memory.h"
|
|
#include "object.h"
|
|
#include "value.h"
|
|
#include "vm.h"
|
|
#include "table.h"
|
|
|
|
#define ALLOCATE_OBJECT(type, objectType) \
|
|
(type*)allocateObject(sizeof(type), objectType)
|
|
|
|
static KrkObj * allocateObject(size_t size, ObjType type) {
|
|
KrkObj * object = (KrkObj*)krk_reallocate(NULL, 0, size);
|
|
object->type = type;
|
|
object->isMarked = 0;
|
|
object->next = vm.objects;
|
|
vm.objects = object;
|
|
return object;
|
|
}
|
|
|
|
size_t krk_codepointToBytes(krk_integer_type value, unsigned char * out) {
|
|
if (value > 0xFFFF) {
|
|
out[0] = (0xF0 | (value >> 18));
|
|
out[1] = (0x80 | ((value >> 12) & 0x3F));
|
|
out[2] = (0x80 | ((value >> 6) & 0x3F));
|
|
out[3] = (0x80 | ((value) & 0x3F));
|
|
return 4;
|
|
} else if (value > 0x7FF) {
|
|
out[0] = (0xE0 | (value >> 12));
|
|
out[1] = (0x80 | ((value >> 6) & 0x3F));
|
|
out[2] = (0x80 | (value & 0x3F));
|
|
return 3;
|
|
} else if (value > 0x7F) {
|
|
out[0] = (0xC0 | (value >> 6));
|
|
out[1] = (0x80 | (value & 0x3F));
|
|
return 2;
|
|
} else {
|
|
out[0] = (unsigned char)value;
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
#define UTF8_ACCEPT 0
|
|
#define UTF8_REJECT 1
|
|
|
|
static inline uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
|
|
static int state_table[32] = {
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xxxxxxx */
|
|
1,1,1,1,1,1,1,1, /* 10xxxxxx */
|
|
2,2,2,2, /* 110xxxxx */
|
|
3,3, /* 1110xxxx */
|
|
4, /* 11110xxx */
|
|
1 /* 11111xxx */
|
|
};
|
|
|
|
static int mask_bytes[32] = {
|
|
0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
|
|
0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
|
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
|
0x1F,0x1F,0x1F,0x1F,
|
|
0x0F,0x0F,
|
|
0x07,
|
|
0x00
|
|
};
|
|
|
|
static int next[5] = {
|
|
0,
|
|
1,
|
|
0,
|
|
2,
|
|
3
|
|
};
|
|
|
|
if (*state == UTF8_ACCEPT) {
|
|
if (byte >= 0x80 && byte <= 0xC1) goto _reject;
|
|
*codep = byte & mask_bytes[byte >> 3];
|
|
*state = state_table[byte >> 3];
|
|
} else if (*state > 0) {
|
|
if (byte < 0x80 || byte >= 0xC0) goto _reject;
|
|
*codep = (byte & 0x3F) | (*codep << 6);
|
|
*state = next[*state];
|
|
}
|
|
return *state;
|
|
_reject:
|
|
*state = UTF8_REJECT;
|
|
return *state;
|
|
}
|
|
|
|
static int checkString(const char * chars, size_t length, size_t *codepointCount) {
|
|
uint32_t state = 0;
|
|
uint32_t codepoint = 0;
|
|
unsigned char * end = (unsigned char *)chars + length;
|
|
uint32_t maxCodepoint = 0;
|
|
for (unsigned char * c = (unsigned char *)chars; c < end; ++c) {
|
|
if (!decode(&state, &codepoint, *c)) {
|
|
if (codepoint > maxCodepoint) maxCodepoint = codepoint;
|
|
(*codepointCount)++;
|
|
} else if (state == UTF8_REJECT) {
|
|
krk_runtimeError(vm.exceptions.valueError, "Invalid UTF-8 sequence in string.");
|
|
fprintf(stderr, "Invalid sequence detected.\n");
|
|
*codepointCount = 0;
|
|
return KRK_STRING_ASCII;
|
|
}
|
|
}
|
|
if (maxCodepoint > 0xFFFF) {
|
|
return KRK_STRING_UCS4;
|
|
} else if (maxCodepoint > 0xFF) {
|
|
return KRK_STRING_UCS2;
|
|
} else if (maxCodepoint > 0x7F) {
|
|
return KRK_STRING_UCS1;
|
|
} else {
|
|
return KRK_STRING_ASCII;
|
|
}
|
|
}
|
|
|
|
#define GENREADY(size,type) \
|
|
static void _readyUCS ## size (KrkString * string) { \
|
|
uint32_t state = 0; \
|
|
uint32_t codepoint = 0; \
|
|
unsigned char * end = (unsigned char *)string->chars + string->length; \
|
|
string->codes = malloc(sizeof(type) * string->codesLength); \
|
|
type *outPtr = (type *)string->codes; \
|
|
for (unsigned char * c = (unsigned char *)string->chars; c < end; ++c) { \
|
|
if (!decode(&state, &codepoint, *c)) { \
|
|
*(outPtr++) = (type)codepoint; \
|
|
} else if (state == UTF8_REJECT) { \
|
|
state = 0; \
|
|
} \
|
|
} \
|
|
}
|
|
GENREADY(1,uint8_t)
|
|
GENREADY(2,uint16_t)
|
|
GENREADY(4,uint32_t)
|
|
#undef GENREADY
|
|
|
|
void * krk_unicodeString(KrkString * string) {
|
|
if (string->codes) return string->codes;
|
|
if (string->type == KRK_STRING_UCS1) _readyUCS1(string);
|
|
else if (string->type == KRK_STRING_UCS2) _readyUCS2(string);
|
|
else if (string->type == KRK_STRING_UCS4) _readyUCS4(string);
|
|
else krk_runtimeError(vm.exceptions.valueError, "Internal string error.");
|
|
return string->codes;
|
|
}
|
|
|
|
uint32_t krk_unicodeCodepoint(KrkString * string, size_t index) {
|
|
krk_unicodeString(string);
|
|
switch (string->type) {
|
|
case KRK_STRING_ASCII: return string->chars[index];
|
|
case KRK_STRING_UCS1: return ((uint8_t*)string->codes)[index];
|
|
case KRK_STRING_UCS2: return ((uint16_t*)string->codes)[index];
|
|
case KRK_STRING_UCS4: return ((uint32_t*)string->codes)[index];
|
|
}
|
|
krk_runtimeError(vm.exceptions.valueError, "Invalid string.");
|
|
return 0;
|
|
}
|
|
|
|
static KrkString * allocateString(char * chars, size_t length, uint32_t hash) {
|
|
KrkString * string = ALLOCATE_OBJECT(KrkString, OBJ_STRING);
|
|
string->length = length;
|
|
string->chars = chars;
|
|
string->hash = hash;
|
|
string->codesLength = 0;
|
|
string->type = checkString(chars,length,&string->codesLength);
|
|
string->codes = NULL;
|
|
if (string->type == KRK_STRING_ASCII) string->codes = string->chars;
|
|
krk_push(OBJECT_VAL(string));
|
|
krk_tableSet(&vm.strings, OBJECT_VAL(string), NONE_VAL());
|
|
krk_pop();
|
|
return string;
|
|
}
|
|
|
|
static uint32_t hashString(const char * key, size_t length) {
|
|
uint32_t hash = 0;
|
|
/* This is the so-called "sdbm" hash. It comes from a piece of
|
|
* public domain code from a clone of ndbm. */
|
|
for (size_t i = 0; i < length; ++i) {
|
|
hash = (int)key[i] + (hash << 6) + (hash << 16) - hash;
|
|
}
|
|
return hash;
|
|
}
|
|
|
|
KrkString * krk_takeString(char * chars, size_t length) {
|
|
uint32_t hash = hashString(chars, length);
|
|
KrkString * interned = krk_tableFindString(&vm.strings, chars, length, hash);
|
|
if (interned != NULL) {
|
|
FREE_ARRAY(char, chars, length + 1);
|
|
return interned;
|
|
}
|
|
return allocateString(chars, length, hash);
|
|
}
|
|
|
|
KrkString * krk_copyString(const char * chars, size_t length) {
|
|
uint32_t hash = hashString(chars, length);
|
|
KrkString * interned = krk_tableFindString(&vm.strings, chars, length, hash);
|
|
if (interned) return interned;
|
|
char * heapChars = ALLOCATE(char, length + 1);
|
|
memcpy(heapChars, chars, length);
|
|
heapChars[length] = '\0';
|
|
return allocateString(heapChars, length, hash);
|
|
}
|
|
|
|
KrkFunction * krk_newFunction(void) {
|
|
KrkFunction * function = ALLOCATE_OBJECT(KrkFunction, OBJ_FUNCTION);
|
|
function->requiredArgs = 0;
|
|
function->keywordArgs = 0;
|
|
function->upvalueCount = 0;
|
|
function->name = NULL;
|
|
function->docstring = NULL;
|
|
function->collectsArguments = 0;
|
|
function->collectsKeywords = 0;
|
|
function->localNameCount = 0;
|
|
function->localNames = NULL;
|
|
function->globalsContext = NULL;
|
|
krk_initValueArray(&function->requiredArgNames);
|
|
krk_initValueArray(&function->keywordArgNames);
|
|
krk_initChunk(&function->chunk);
|
|
return function;
|
|
}
|
|
|
|
KrkNative * krk_newNative(NativeFn function, const char * name, int type) {
|
|
KrkNative * native = ALLOCATE_OBJECT(KrkNative, OBJ_NATIVE);
|
|
native->function = function;
|
|
native->isMethod = type;
|
|
native->name = name;
|
|
return native;
|
|
}
|
|
|
|
KrkClosure * krk_newClosure(KrkFunction * function) {
|
|
KrkUpvalue ** upvalues = ALLOCATE(KrkUpvalue*, function->upvalueCount);
|
|
for (size_t i = 0; i < function->upvalueCount; ++i) {
|
|
upvalues[i] = NULL;
|
|
}
|
|
KrkClosure * closure = ALLOCATE_OBJECT(KrkClosure, OBJ_CLOSURE);
|
|
closure->function = function;
|
|
closure->upvalues = upvalues;
|
|
closure->upvalueCount = function->upvalueCount;
|
|
return closure;
|
|
}
|
|
|
|
KrkUpvalue * krk_newUpvalue(int slot) {
|
|
KrkUpvalue * upvalue = ALLOCATE_OBJECT(KrkUpvalue, OBJ_UPVALUE);
|
|
upvalue->location = slot;
|
|
upvalue->next = NULL;
|
|
upvalue->closed = NONE_VAL();
|
|
return upvalue;
|
|
}
|
|
|
|
KrkClass * krk_newClass(KrkString * name) {
|
|
KrkClass * _class = ALLOCATE_OBJECT(KrkClass, OBJ_CLASS);
|
|
_class->name = name;
|
|
_class->filename = NULL;
|
|
_class->docstring = NULL;
|
|
_class->base = NULL;
|
|
krk_initTable(&_class->methods);
|
|
krk_initTable(&_class->fields);
|
|
|
|
_class->_getter = NULL;
|
|
_class->_setter = NULL;
|
|
_class->_slicer = NULL;
|
|
_class->_reprer = NULL;
|
|
_class->_tostr = NULL;
|
|
_class->_call = NULL;
|
|
_class->_init = NULL;
|
|
_class->_eq = NULL;
|
|
_class->_len = NULL;
|
|
_class->_enter = NULL;
|
|
_class->_exit = NULL;
|
|
_class->_delitem = NULL;
|
|
_class->_iter = NULL;
|
|
_class->_getattr = NULL;
|
|
_class->_dir = NULL;
|
|
|
|
return _class;
|
|
}
|
|
|
|
KrkInstance * krk_newInstance(KrkClass * _class) {
|
|
KrkInstance * instance = ALLOCATE_OBJECT(KrkInstance, OBJ_INSTANCE);
|
|
instance->_class = _class;
|
|
krk_initTable(&instance->fields);
|
|
krk_push(OBJECT_VAL(instance));
|
|
krk_tableAddAll(&_class->fields, &instance->fields);
|
|
krk_pop();
|
|
instance->_internal = NULL; /* To be used by C-defined types to track internal objects. */
|
|
return instance;
|
|
}
|
|
|
|
KrkBoundMethod * krk_newBoundMethod(KrkValue receiver, KrkObj * method) {
|
|
KrkBoundMethod * bound = ALLOCATE_OBJECT(KrkBoundMethod, OBJ_BOUND_METHOD);
|
|
bound->receiver = receiver;
|
|
bound->method = method;
|
|
return bound;
|
|
}
|
|
|
|
KrkTuple * krk_newTuple(size_t length) {
|
|
KrkTuple * tuple = ALLOCATE_OBJECT(KrkTuple, OBJ_TUPLE);
|
|
tuple->inrepr = 0;
|
|
krk_initValueArray(&tuple->values);
|
|
krk_push(OBJECT_VAL(tuple));
|
|
tuple->values.capacity = length;
|
|
tuple->values.values = GROW_ARRAY(KrkValue,NULL,0,length);
|
|
krk_pop();
|
|
return tuple;
|
|
}
|
|
|
|
void krk_bytesUpdateHash(KrkBytes * bytes) {
|
|
bytes->hash = hashString((char*)bytes->bytes, bytes->length);
|
|
}
|
|
|
|
KrkBytes * krk_newBytes(size_t length, uint8_t * source) {
|
|
KrkBytes * bytes = ALLOCATE_OBJECT(KrkBytes, OBJ_BYTES);
|
|
bytes->length = length;
|
|
bytes->bytes = NULL;
|
|
krk_push(OBJECT_VAL(bytes));
|
|
bytes->bytes = ALLOCATE(uint8_t, length);
|
|
bytes->hash = -1;
|
|
if (source) {
|
|
memcpy(bytes->bytes, source, length);
|
|
krk_bytesUpdateHash(bytes);
|
|
}
|
|
krk_pop();
|
|
return bytes;
|
|
}
|
|
|