kuroko/object.c
2021-01-19 14:05:21 +09:00

326 lines
9.3 KiB
C

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "memory.h"
#include "object.h"
#include "value.h"
#include "vm.h"
#include "table.h"
#define ALLOCATE_OBJECT(type, objectType) \
(type*)allocateObject(sizeof(type), objectType)
static KrkObj * allocateObject(size_t size, ObjType type) {
KrkObj * object = (KrkObj*)krk_reallocate(NULL, 0, size);
object->type = type;
object->isMarked = 0;
object->next = vm.objects;
vm.objects = object;
return object;
}
size_t krk_codepointToBytes(krk_integer_type value, unsigned char * out) {
if (value > 0xFFFF) {
out[0] = (0xF0 | (value >> 18));
out[1] = (0x80 | ((value >> 12) & 0x3F));
out[2] = (0x80 | ((value >> 6) & 0x3F));
out[3] = (0x80 | ((value) & 0x3F));
return 4;
} else if (value > 0x7FF) {
out[0] = (0xE0 | (value >> 12));
out[1] = (0x80 | ((value >> 6) & 0x3F));
out[2] = (0x80 | (value & 0x3F));
return 3;
} else if (value > 0x7F) {
out[0] = (0xC0 | (value >> 6));
out[1] = (0x80 | (value & 0x3F));
return 2;
} else {
out[0] = (unsigned char)value;
return 1;
}
}
#define UTF8_ACCEPT 0
#define UTF8_REJECT 1
static inline uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
static int state_table[32] = {
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xxxxxxx */
1,1,1,1,1,1,1,1, /* 10xxxxxx */
2,2,2,2, /* 110xxxxx */
3,3, /* 1110xxxx */
4, /* 11110xxx */
1 /* 11111xxx */
};
static int mask_bytes[32] = {
0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,0x7F,
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
0x1F,0x1F,0x1F,0x1F,
0x0F,0x0F,
0x07,
0x00
};
static int next[5] = {
0,
1,
0,
2,
3
};
if (*state == UTF8_ACCEPT) {
if (byte >= 0x80 && byte <= 0xC1) goto _reject;
*codep = byte & mask_bytes[byte >> 3];
*state = state_table[byte >> 3];
} else if (*state > 0) {
if (byte < 0x80 || byte >= 0xC0) goto _reject;
*codep = (byte & 0x3F) | (*codep << 6);
*state = next[*state];
}
return *state;
_reject:
*state = UTF8_REJECT;
return *state;
}
static int checkString(const char * chars, size_t length, size_t *codepointCount) {
uint32_t state = 0;
uint32_t codepoint = 0;
unsigned char * end = (unsigned char *)chars + length;
uint32_t maxCodepoint = 0;
for (unsigned char * c = (unsigned char *)chars; c < end; ++c) {
if (!decode(&state, &codepoint, *c)) {
if (codepoint > maxCodepoint) maxCodepoint = codepoint;
(*codepointCount)++;
} else if (state == UTF8_REJECT) {
krk_runtimeError(vm.exceptions.valueError, "Invalid UTF-8 sequence in string.");
fprintf(stderr, "Invalid sequence detected.\n");
*codepointCount = 0;
return KRK_STRING_ASCII;
}
}
if (maxCodepoint > 0xFFFF) {
return KRK_STRING_UCS4;
} else if (maxCodepoint > 0xFF) {
return KRK_STRING_UCS2;
} else if (maxCodepoint > 0x7F) {
return KRK_STRING_UCS1;
} else {
return KRK_STRING_ASCII;
}
}
#define GENREADY(size,type) \
static void _readyUCS ## size (KrkString * string) { \
uint32_t state = 0; \
uint32_t codepoint = 0; \
unsigned char * end = (unsigned char *)string->chars + string->length; \
string->codes = malloc(sizeof(type) * string->codesLength); \
type *outPtr = (type *)string->codes; \
for (unsigned char * c = (unsigned char *)string->chars; c < end; ++c) { \
if (!decode(&state, &codepoint, *c)) { \
*(outPtr++) = (type)codepoint; \
} else if (state == UTF8_REJECT) { \
state = 0; \
} \
} \
}
GENREADY(1,uint8_t)
GENREADY(2,uint16_t)
GENREADY(4,uint32_t)
#undef GENREADY
void * krk_unicodeString(KrkString * string) {
if (string->codes) return string->codes;
if (string->type == KRK_STRING_UCS1) _readyUCS1(string);
else if (string->type == KRK_STRING_UCS2) _readyUCS2(string);
else if (string->type == KRK_STRING_UCS4) _readyUCS4(string);
else krk_runtimeError(vm.exceptions.valueError, "Internal string error.");
return string->codes;
}
uint32_t krk_unicodeCodepoint(KrkString * string, size_t index) {
krk_unicodeString(string);
switch (string->type) {
case KRK_STRING_ASCII: return string->chars[index];
case KRK_STRING_UCS1: return ((uint8_t*)string->codes)[index];
case KRK_STRING_UCS2: return ((uint16_t*)string->codes)[index];
case KRK_STRING_UCS4: return ((uint32_t*)string->codes)[index];
}
krk_runtimeError(vm.exceptions.valueError, "Invalid string.");
return 0;
}
static KrkString * allocateString(char * chars, size_t length, uint32_t hash) {
KrkString * string = ALLOCATE_OBJECT(KrkString, OBJ_STRING);
string->length = length;
string->chars = chars;
string->hash = hash;
string->codesLength = 0;
string->type = checkString(chars,length,&string->codesLength);
string->codes = NULL;
if (string->type == KRK_STRING_ASCII) string->codes = string->chars;
krk_push(OBJECT_VAL(string));
krk_tableSet(&vm.strings, OBJECT_VAL(string), NONE_VAL());
krk_pop();
return string;
}
static uint32_t hashString(const char * key, size_t length) {
uint32_t hash = 0;
/* This is the so-called "sdbm" hash. It comes from a piece of
* public domain code from a clone of ndbm. */
for (size_t i = 0; i < length; ++i) {
hash = (int)key[i] + (hash << 6) + (hash << 16) - hash;
}
return hash;
}
KrkString * krk_takeString(char * chars, size_t length) {
uint32_t hash = hashString(chars, length);
KrkString * interned = krk_tableFindString(&vm.strings, chars, length, hash);
if (interned != NULL) {
FREE_ARRAY(char, chars, length + 1);
return interned;
}
return allocateString(chars, length, hash);
}
KrkString * krk_copyString(const char * chars, size_t length) {
uint32_t hash = hashString(chars, length);
KrkString * interned = krk_tableFindString(&vm.strings, chars, length, hash);
if (interned) return interned;
char * heapChars = ALLOCATE(char, length + 1);
memcpy(heapChars, chars, length);
heapChars[length] = '\0';
return allocateString(heapChars, length, hash);
}
KrkFunction * krk_newFunction(void) {
KrkFunction * function = ALLOCATE_OBJECT(KrkFunction, OBJ_FUNCTION);
function->requiredArgs = 0;
function->keywordArgs = 0;
function->upvalueCount = 0;
function->name = NULL;
function->docstring = NULL;
function->collectsArguments = 0;
function->collectsKeywords = 0;
function->localNameCount = 0;
function->localNames = NULL;
function->globalsContext = NULL;
krk_initValueArray(&function->requiredArgNames);
krk_initValueArray(&function->keywordArgNames);
krk_initChunk(&function->chunk);
return function;
}
KrkNative * krk_newNative(NativeFn function, const char * name, int type) {
KrkNative * native = ALLOCATE_OBJECT(KrkNative, OBJ_NATIVE);
native->function = function;
native->isMethod = type;
native->name = name;
return native;
}
KrkClosure * krk_newClosure(KrkFunction * function) {
KrkUpvalue ** upvalues = ALLOCATE(KrkUpvalue*, function->upvalueCount);
for (size_t i = 0; i < function->upvalueCount; ++i) {
upvalues[i] = NULL;
}
KrkClosure * closure = ALLOCATE_OBJECT(KrkClosure, OBJ_CLOSURE);
closure->function = function;
closure->upvalues = upvalues;
closure->upvalueCount = function->upvalueCount;
return closure;
}
KrkUpvalue * krk_newUpvalue(int slot) {
KrkUpvalue * upvalue = ALLOCATE_OBJECT(KrkUpvalue, OBJ_UPVALUE);
upvalue->location = slot;
upvalue->next = NULL;
upvalue->closed = NONE_VAL();
return upvalue;
}
KrkClass * krk_newClass(KrkString * name) {
KrkClass * _class = ALLOCATE_OBJECT(KrkClass, OBJ_CLASS);
_class->name = name;
_class->filename = NULL;
_class->docstring = NULL;
_class->base = NULL;
krk_initTable(&_class->methods);
krk_initTable(&_class->fields);
_class->_getter = NULL;
_class->_setter = NULL;
_class->_slicer = NULL;
_class->_reprer = NULL;
_class->_tostr = NULL;
_class->_call = NULL;
_class->_init = NULL;
_class->_eq = NULL;
_class->_len = NULL;
_class->_enter = NULL;
_class->_exit = NULL;
_class->_delitem = NULL;
_class->_iter = NULL;
_class->_getattr = NULL;
_class->_dir = NULL;
return _class;
}
KrkInstance * krk_newInstance(KrkClass * _class) {
KrkInstance * instance = ALLOCATE_OBJECT(KrkInstance, OBJ_INSTANCE);
instance->_class = _class;
krk_initTable(&instance->fields);
krk_push(OBJECT_VAL(instance));
krk_tableAddAll(&_class->fields, &instance->fields);
krk_pop();
instance->_internal = NULL; /* To be used by C-defined types to track internal objects. */
return instance;
}
KrkBoundMethod * krk_newBoundMethod(KrkValue receiver, KrkObj * method) {
KrkBoundMethod * bound = ALLOCATE_OBJECT(KrkBoundMethod, OBJ_BOUND_METHOD);
bound->receiver = receiver;
bound->method = method;
return bound;
}
KrkTuple * krk_newTuple(size_t length) {
KrkTuple * tuple = ALLOCATE_OBJECT(KrkTuple, OBJ_TUPLE);
tuple->inrepr = 0;
krk_initValueArray(&tuple->values);
krk_push(OBJECT_VAL(tuple));
tuple->values.capacity = length;
tuple->values.values = GROW_ARRAY(KrkValue,NULL,0,length);
krk_pop();
return tuple;
}
void krk_bytesUpdateHash(KrkBytes * bytes) {
bytes->hash = hashString((char*)bytes->bytes, bytes->length);
}
KrkBytes * krk_newBytes(size_t length, uint8_t * source) {
KrkBytes * bytes = ALLOCATE_OBJECT(KrkBytes, OBJ_BYTES);
bytes->length = length;
bytes->bytes = NULL;
krk_push(OBJECT_VAL(bytes));
bytes->bytes = ALLOCATE(uint8_t, length);
bytes->hash = -1;
if (source) {
memcpy(bytes->bytes, source, length);
krk_bytesUpdateHash(bytes);
}
krk_pop();
return bytes;
}