From abfaa50beed11e376733587bb0fee19d0403b41b Mon Sep 17 00:00:00 2001 From: "K. Lange" Date: Sun, 17 Jan 2021 22:01:58 +0900 Subject: [PATCH] Implement module packages --- README.md | 74 ++++++++++++ chunk.h | 2 + compiler.c | 47 +++++++- debug.c | 1 + modules/foo/__init__.krk | 1 + modules/foo/bar/__init__.krk | 1 + modules/foo/bar/baz.krk | 2 + test/__init__.krk | 0 test/__init__.krk.expect | 0 test/testPackageImports.krk | 58 ++++++++++ test/testPackageImports.krk.expect | 23 ++++ vm.c | 180 +++++++++++++++++++++++++---- 12 files changed, 363 insertions(+), 26 deletions(-) create mode 100644 modules/foo/__init__.krk create mode 100644 modules/foo/bar/__init__.krk create mode 100644 modules/foo/bar/baz.krk create mode 100644 test/__init__.krk create mode 100644 test/__init__.krk.expect create mode 100644 test/testPackageImports.krk create mode 100644 test/testPackageImports.krk.expect diff --git a/README.md b/README.md index 575010f..c2b0f42 100644 --- a/README.md +++ b/README.md @@ -719,6 +719,80 @@ print(imported) _**Note:** When individual names are imported from a module, they refer to the same object, but if new assignments are made to the name it will not affect the original module. If you need to replace values defined in a module, always be sure to refer to it by its full name._ +Modules can also come in the form of _packages_. Packages are modules that contain other modules. To make a package, create a directory in one of the module import paths with the name of your package and place a file named `__init__.krk` in that directory. This file will be run when the package is imported, but if you only want to use packages for namespacing it does not need to have any content. + +Say we have a directory tree as follows: + +``` +modules/ + foo/ + __init__.krk + bar/ + __init__.krk + baz.krk +``` + +With this directory tree, we can `import foo`, `import foo.bar`, or `import foo.bar.baz`. + +When a module within a package is imported directly, as in `import foo.bar.baz`, its parent packages are imported in order and the interpreter ensures each has an attribute pointing to the next child. After the `import` statement, the top-level package will be bound in the current scope: + +```py +import foo.bar.baz +print(foo) +print(foo.bar) +print(foo.bar.baz) +# → +# +# +``` + +If we want to get at the module `baz` we can use `import ... as ...` to bind it to a name instead: + +```py +import foo.bar.baz as baz +print(baz) +try: + print(foo) # NameError +except: + print(repr(exception)) +# → +# NameError: Undefined variable 'foo'. +``` + +Note that only module names can be specified as the first argument to `import` or `from`, and that if a module within a package has never been imported it will not be available from its package. + +If we define something in `modules/foo/bar/baz.krk` we can access it either by its full name or through a `from` import: + +```py +# modules/foo/bar/baz.krk +let qux = "hello, world" +``` + +```py +import foo.bar.baz +print(foo.bar.baz.qux) +from foo.bar.baz import qux +print(qux) +# → hello, world +# hello, world +``` + +When using `from ... import`, the imported name can be a module, package, or regular member of the module before the `import`. Multiple names can be imported at once, but only one level can be imported: + +```py +# modules/foo/bar/baz.krk +let qux = "hello, world" +let quux = 42 +``` + +```py +# This is a syntax error. +#from foo.bar import baz.qux +from foo.bar.baz import qux, quux +print(qux,quux) +# → hello, world 42 +``` + ### Loops Kuroko supports C-style for loops, while loops, and Python-style iterator for loops. diff --git a/chunk.h b/chunk.h index ef1fa2e..60e857d 100644 --- a/chunk.h +++ b/chunk.h @@ -80,6 +80,7 @@ typedef enum { OP_DEL_GLOBAL, OP_DEL_PROPERTY, OP_INVOKE_DELETE, + OP_IMPORT_FROM, OP_CONSTANT_LONG = 128, OP_DEFINE_GLOBAL_LONG, @@ -103,6 +104,7 @@ typedef enum { OP_UNPACK_LONG, OP_DEL_GLOBAL_LONG, OP_DEL_PROPERTY_LONG, + OP_IMPORT_FROM_LONG, } KrkOpCode; typedef struct { diff --git a/compiler.c b/compiler.c index a55fcad..c907fa5 100644 --- a/compiler.c +++ b/compiler.c @@ -1519,28 +1519,63 @@ static void raiseStatement() { emitByte(OP_RAISE); } -static void importStatement() { + +static size_t importModule(KrkToken * startOfName) { consume(TOKEN_IDENTIFIER, "Expected module name"); - size_t ind = identifierConstant(&parser.previous); + *startOfName = parser.previous; + while (match(TOKEN_DOT)) { + if (startOfName->start + startOfName->literalWidth != parser.previous.start) { + error("Unexpected whitespace after module path element"); + return 0; + } + startOfName->literalWidth += parser.previous.literalWidth; + startOfName->length += parser.previous.length; + consume(TOKEN_IDENTIFIER, "Expected module path element after '.'"); + if (startOfName->start + startOfName->literalWidth != parser.previous.start) { + error("Unexpected whitespace after '.'"); + return 0; + } + startOfName->literalWidth += parser.previous.literalWidth; + startOfName->length += parser.previous.length; + } + size_t ind = identifierConstant(startOfName); EMIT_CONSTANT_OP(OP_IMPORT, ind); + return ind; +} + +static void importStatement() { + KrkToken firstName = parser.current; + KrkToken startOfName; + size_t ind = importModule(&startOfName); if (match(TOKEN_AS)) { consume(TOKEN_IDENTIFIER, "Expected identifier after `as`"); ind = identifierConstant(&parser.previous); + } else if (startOfName.length != firstName.length) { + /** + * We imported foo.bar.baz and 'baz' is now on the stack with no name. + * But while doing that, we built a chain so that foo and foo.bar are + * valid modules that already exist in the module table. We want to + * have 'foo.bar.baz' be this new object, so remove 'baz', reimport + * 'foo' directly, and put 'foo' into the appropriate namespace. + */ + emitByte(OP_POP); + parser.previous = firstName; + ind = identifierConstant(&firstName); + EMIT_CONSTANT_OP(OP_IMPORT, ind); } declareVariable(); defineVariable(ind); } static void fromImportStatement() { - consume(TOKEN_IDENTIFIER, "Expected module name after 'from'"); - size_t ind = identifierConstant(&parser.previous); - EMIT_CONSTANT_OP(OP_IMPORT, ind); + KrkToken startOfName; + importModule(&startOfName); consume(TOKEN_IMPORT, "Expected 'import' after module name"); do { consume(TOKEN_IDENTIFIER, "Expected member name"); size_t member = identifierConstant(&parser.previous); emitBytes(OP_DUP, 0); /* Duplicate the package object so we can GET_PROPERTY on it? */ - EMIT_CONSTANT_OP(OP_GET_PROPERTY, member); + EMIT_CONSTANT_OP(OP_IMPORT_FROM, member); if (match(TOKEN_AS)) { consume(TOKEN_IDENTIFIER, "Expected identifier after `as`"); member = identifierConstant(&parser.previous); diff --git a/debug.c b/debug.c index 1ee18c6..e77f919 100644 --- a/debug.c +++ b/debug.c @@ -135,6 +135,7 @@ size_t krk_disassembleInstruction(FILE * f, KrkFunction * func, size_t offset) { CONSTANT(OP_METHOD, (void)0) CONSTANT(OP_CLOSURE, CLOSURE_MORE) CONSTANT(OP_IMPORT, (void)0) + CONSTANT(OP_IMPORT_FROM, (void)0) CONSTANT(OP_GET_SUPER, (void)0) OPERAND(OP_KWARGS, (void)0) OPERAND(OP_SET_LOCAL, LOCAL_MORE) diff --git a/modules/foo/__init__.krk b/modules/foo/__init__.krk new file mode 100644 index 0000000..a332764 --- /dev/null +++ b/modules/foo/__init__.krk @@ -0,0 +1 @@ +print("Imported foo.__init__ as", __name__) diff --git a/modules/foo/bar/__init__.krk b/modules/foo/bar/__init__.krk new file mode 100644 index 0000000..7209164 --- /dev/null +++ b/modules/foo/bar/__init__.krk @@ -0,0 +1 @@ +print("Imported bar.__init__ as", __name__) diff --git a/modules/foo/bar/baz.krk b/modules/foo/bar/baz.krk new file mode 100644 index 0000000..4fac823 --- /dev/null +++ b/modules/foo/bar/baz.krk @@ -0,0 +1,2 @@ +print("imported baz.krk as", __name__) +let qux = "This is it!" diff --git a/test/__init__.krk b/test/__init__.krk new file mode 100644 index 0000000..e69de29 diff --git a/test/__init__.krk.expect b/test/__init__.krk.expect new file mode 100644 index 0000000..e69de29 diff --git a/test/testPackageImports.krk b/test/testPackageImports.krk new file mode 100644 index 0000000..54f35f4 --- /dev/null +++ b/test/testPackageImports.krk @@ -0,0 +1,58 @@ +def testTop(): + import foo + print(foo) + try: + print(foo.bar, "Fail") + except: + print(repr(exception)) # AttributeError + +def testCaching(): + from foo.bar import baz + print(baz) + import foo + print(foo) + print(foo.bar) + print(foo.bar.baz) + print(foo.bar.baz.qux) + +def testDirect(): + import foo.bar.baz + print(foo) + print(foo.bar) + print(foo.bar.baz) + print(foo.bar.baz.qux) + +def testFromImport(): + from foo.bar import baz + print(baz) + print(baz.qux) + try: + print(foo, "Fail") + except: + print(repr(exception)) + +def testRenames(): + import foo.bar.baz as blah + print(blah) + print(blah.qux) + try: + print(foo, "Fail") + except: + print(repr(exception)) + from foo.bar.baz import qux as thing + print(thing) + try: + print(qux, "Fail") + except: + print(repr(exception)) + try: + print(foo.bar, "Fail") + except: + print(repr(exception)) + +if __name__ == '__main__': + testTop() + testCaching() + testDirect() + testFromImport() + testRenames() diff --git a/test/testPackageImports.krk.expect b/test/testPackageImports.krk.expect new file mode 100644 index 0000000..7afcf60 --- /dev/null +++ b/test/testPackageImports.krk.expect @@ -0,0 +1,23 @@ +Imported foo.__init__ as foo + +AttributeError: 'module' object has no attribute 'bar' +Imported bar.__init__ as foo.bar +imported baz.krk as foo.bar.baz + + + + +This is it! + + + +This is it! + +This is it! +NameError: Undefined variable 'foo'. + +This is it! +NameError: Undefined variable 'foo'. +This is it! +NameError: Undefined variable 'qux'. +NameError: Undefined variable 'foo'. diff --git a/vm.c b/vm.c index 4922196..e4fe349 100644 --- a/vm.c +++ b/vm.c @@ -3634,11 +3634,11 @@ static int handleException() { * a later search path has a krk source and an earlier search path has a shared * object module, the later search path will still win. */ -int krk_loadModule(KrkString * name, KrkValue * moduleOut, KrkString * runAs) { +int krk_loadModule(KrkString * path, KrkValue * moduleOut, KrkString * runAs) { KrkValue modulePaths, modulePathsInternal; /* See if the module is already loaded */ - if (krk_tableGet(&vm.modules, OBJECT_VAL(name), moduleOut)) { + if (krk_tableGet(&vm.modules, OBJECT_VAL(runAs), moduleOut)) { krk_push(*moduleOut); return 1; } @@ -3676,22 +3676,36 @@ int krk_loadModule(KrkString * name, KrkValue * moduleOut, KrkString * runAs) { struct stat statbuf; - /* First search for {name}.krk in the module search paths */ + /* First search for {path}.krk in the module search paths */ for (int i = 0; i < moduleCount; ++i, krk_pop()) { - krk_push(AS_FUNCTION(modulePathsInternal)->chunk.constants.values[i]); + krk_push(AS_LIST(modulePathsInternal)->values[i]); if (!IS_STRING(krk_peek(0))) { *moduleOut = NONE_VAL(); krk_runtimeError(vm.exceptions.typeError, "Module search paths must be strings; check the search path at index %d", i); return 0; } - krk_push(OBJECT_VAL(name)); + krk_push(OBJECT_VAL(path)); addObjects(); /* Concatenate path... */ krk_push(OBJECT_VAL(S(".krk"))); addObjects(); /* and file extension */ + int isPackage = 0; char * fileName = AS_CSTRING(krk_peek(0)); - if (stat(fileName,&statbuf) < 0) continue; + if (stat(fileName,&statbuf) < 0) { + krk_pop(); + /* try /__init__.krk */ + krk_push(AS_LIST(modulePathsInternal)->values[i]); + krk_push(OBJECT_VAL(path)); + addObjects(); + krk_push(OBJECT_VAL(S("/__init__.krk"))); + addObjects(); + fileName = AS_CSTRING(krk_peek(0)); + if (stat(fileName,&statbuf) < 0) { + continue; + } + isPackage = 1; + } /* Compile and run the module in a new context and exit the VM when it * returns to the current call frame; modules should return objects. */ @@ -3702,23 +3716,27 @@ int krk_loadModule(KrkString * name, KrkValue * moduleOut, KrkString * runAs) { if (!IS_OBJECT(*moduleOut)) { if (!(vm.flags & KRK_HAS_EXCEPTION)) { krk_runtimeError(vm.exceptions.importError, - "Failed to load module '%s' from '%s'", name->chars, fileName); + "Failed to load module '%s' from '%s'", runAs->chars, fileName); } return 0; } krk_pop(); /* concatenated filename on stack */ krk_push(*moduleOut); - krk_tableSet(&vm.modules, OBJECT_VAL(name), *moduleOut); + krk_tableSet(&vm.modules, OBJECT_VAL(runAs), *moduleOut); + /* Was this a package? */ + if (isPackage) { + krk_attachNamedValue(&AS_INSTANCE(*moduleOut)->fields,"__ispackage__",BOOLEAN_VAL(1)); + } return 1; } #ifndef STATIC_ONLY - /* If we didn't find {name}.krk, try {name}.so in the same order */ + /* If we didn't find {path}.krk, try {path}.so in the same order */ for (int i = 0; i < moduleCount; ++i, krk_pop()) { /* Assume things haven't changed and all of these are strings. */ krk_push(AS_FUNCTION(modulePathsInternal)->chunk.constants.values[i]); - krk_push(OBJECT_VAL(name)); + krk_push(OBJECT_VAL(path)); addObjects(); /* this should just be basic concatenation */ krk_push(OBJECT_VAL(S(".so"))); addObjects(); @@ -3730,12 +3748,17 @@ int krk_loadModule(KrkString * name, KrkValue * moduleOut, KrkString * runAs) { if (!dlRef) { *moduleOut = NONE_VAL(); krk_runtimeError(vm.exceptions.importError, - "Failed to load native module '%s' from shared object '%s'", name->chars, fileName); + "Failed to load native module '%s' from shared object '%s'", runAs->chars, fileName); return 0; } + const char * start = path->chars; + for (const char * c = start; *c; c++) { + if (*c == '.') start = c + 1; + } + krk_push(OBJECT_VAL(S("krk_module_onload_"))); - krk_push(OBJECT_VAL(name)); + krk_push(OBJECT_VAL(krk_copyString(start,strlen(start)))); addObjects(); char * handlerName = AS_CSTRING(krk_peek(0)); @@ -3757,28 +3780,125 @@ int krk_loadModule(KrkString * name, KrkValue * moduleOut, KrkString * runAs) { *moduleOut = moduleOnLoad(runAs); if (!IS_INSTANCE(*moduleOut)) { krk_runtimeError(vm.exceptions.importError, - "Failed to load module '%s' from '%s'", name->chars, fileName); + "Failed to load module '%s' from '%s'", runAs->chars, fileName); return 0; } krk_push(*moduleOut); krk_swap(1); - krk_attachNamedObject(&AS_INSTANCE(*moduleOut)->fields, "__name__", (KrkObj*)name); + krk_attachNamedObject(&AS_INSTANCE(*moduleOut)->fields, "__name__", (KrkObj*)runAs); krk_attachNamedValue(&AS_INSTANCE(*moduleOut)->fields, "__file__", krk_peek(0)); krk_pop(); /* filename */ - krk_tableSet(&vm.modules, OBJECT_VAL(name), *moduleOut); + krk_tableSet(&vm.modules, OBJECT_VAL(runAs), *moduleOut); return 1; } #endif /* If we still haven't found anything, fail. */ *moduleOut = NONE_VAL(); - krk_runtimeError(vm.exceptions.importError, "No module named '%s'", name->chars); + krk_runtimeError(vm.exceptions.importError, "No module named '%s'", runAs->chars); return 0; } +int krk_doRecursiveModuleLoad(KrkString * name) { + /* See if 'name' is clear to directly import */ + int isClear = 1; + for (size_t i = 0; i < name->length; ++i) { + if (name->chars[i] == '.') { + isClear = 0; + break; + } + } + + if (isClear) { + KrkValue base; + return krk_loadModule(name,&base,name); + } + + /** + * To import foo.bar.baz + * - import foo as foo + * - import foo/bar as foo.bar + * - import foo/bar/baz as foo.bar.baz + */ + + /* Let's split up name */ + krk_push(NONE_VAL()); // -1: last + int argBase = vm.stackTop - vm.stack; + krk_push(NONE_VAL()); // 0: Name of current node being processed. + krk_push(OBJECT_VAL(S(""))); // 1: slash/separated/path + krk_push(OBJECT_VAL(S(""))); // 2: dot.separated.path + krk_push(OBJECT_VAL(name)); // 3: remaining path to process + krk_push(OBJECT_VAL(S("."))); // 4: string "." to search for + do { + KrkValue listOut = _string_split(3,(KrkValue[]){vm.stack[argBase+3], vm.stack[argBase+4], INTEGER_VAL(1)}, 0); + if (!IS_INSTANCE(listOut)) return 0; + KrkValue _list_internal = OBJECT_VAL(AS_INSTANCE(listOut)->_internal); + + /* Set node */ + vm.stack[argBase+0] = AS_LIST(_list_internal)->values[0]; + + /* Set remainder */ + if (AS_LIST(_list_internal)->count > 1) { + vm.stack[argBase+3] = AS_LIST(_list_internal)->values[1]; + } else { + vm.stack[argBase+3] = NONE_VAL(); + } + + /* First is /-path */ + krk_push(vm.stack[argBase+1]); + krk_push(vm.stack[argBase+0]); + addObjects(); + vm.stack[argBase+1] = krk_pop(); + /* Second is .-path */ + krk_push(vm.stack[argBase+2]); + krk_push(vm.stack[argBase+0]); + addObjects(); + vm.stack[argBase+2] = krk_pop(); + + if (IS_NONE(vm.stack[argBase+3])) { + krk_pop(); /* dot */ + krk_pop(); /* remainder */ + KrkValue current; + if (!krk_loadModule(AS_STRING(vm.stack[argBase+1]), ¤t, AS_STRING(vm.stack[argBase+2]))) return 0; + krk_pop(); /* dot-sepaerated */ + krk_pop(); /* slash-separated */ + krk_push(current); + /* last must be something if we got here, because single-level import happens elsewhere */ + krk_tableSet(&AS_INSTANCE(vm.stack[argBase-1])->fields, vm.stack[argBase+0], krk_peek(0)); + vm.stackTop = vm.stack + argBase; + vm.stackTop[-1] = current; + return 1; + } else { + KrkValue current; + if (!krk_loadModule(AS_STRING(vm.stack[argBase+1]), ¤t, AS_STRING(vm.stack[argBase+2]))) return 0; + krk_push(current); + if (!IS_NONE(vm.stack[argBase-1])) { + krk_tableSet(&AS_INSTANCE(vm.stack[argBase-1])->fields, vm.stack[argBase+0], krk_peek(0)); + } + /* Is this a package? */ + KrkValue tmp; + if (!krk_tableGet(&AS_INSTANCE(current)->fields, OBJECT_VAL(S("__ispackage__")), &tmp) || !IS_BOOLEAN(tmp) || AS_BOOLEAN(tmp) != 1) { + krk_runtimeError(vm.exceptions.importError, "'%s' is not a package", AS_CSTRING(vm.stack[argBase+2])); + return 0; + } + vm.stack[argBase-1] = krk_pop(); + /* Now concatenate forward slash... */ + krk_push(vm.stack[argBase+1]); /* Slash path */ + krk_push(OBJECT_VAL(S("/"))); + addObjects(); + vm.stack[argBase+1] = krk_pop(); + /* And now for the dot... */ + krk_push(vm.stack[argBase+2]); + krk_push(vm.stack[argBase+4]); + addObjects(); + vm.stack[argBase+2] = krk_pop(); + } + } while (1); +} + /** * Try to resolve and push [stack top].name. * If [stack top] is an instance, scan fields first. @@ -4040,8 +4160,7 @@ static KrkValue run() { case OP_IMPORT_LONG: case OP_IMPORT: { KrkString * name = READ_STRING(operandWidth); - KrkValue module; - if (!krk_loadModule(name, &module, name)) { + if (!krk_doRecursiveModuleLoad(name)) { goto _finishException; } break; @@ -4158,6 +4277,28 @@ static KrkValue run() { krk_tableAddAll(&vm.objectClass->fields, &_class->fields); break; } + case OP_IMPORT_FROM_LONG: + case OP_IMPORT_FROM: { + KrkString * name = READ_STRING(operandWidth); + if (unlikely(!valueGetProperty(name))) { + /* Try to import... */ + KrkValue moduleName; + if (!krk_tableGet(&AS_INSTANCE(krk_peek(0))->fields, vm.specialMethodNames[METHOD_NAME], &moduleName)) { + krk_runtimeError(vm.exceptions.attributeError, "'%s' object has no attribute '%s'", krk_typeName(krk_peek(0)), name->chars); + goto _finishException; + } + krk_push(moduleName); + krk_push(OBJECT_VAL(S("."))); + addObjects(); + krk_push(OBJECT_VAL(name)); + addObjects(); + if (!krk_doRecursiveModuleLoad(AS_STRING(krk_peek(0)))) { + goto _finishException; + } + vm.stackTop[-3] = vm.stackTop[-1]; + vm.stackTop -= 2; + } + } break; case OP_GET_PROPERTY_LONG: case OP_GET_PROPERTY: { KrkString * name = READ_STRING(operandWidth); @@ -4399,9 +4540,8 @@ KrkValue krk_interpret(const char * src, int newScope, char * fromName, char * f return NONE_VAL(); } - krk_attachNamedObject(&vm.module->fields, "__file__", (KrkObj*)function->chunk.filename); - krk_push(OBJECT_VAL(function)); + krk_attachNamedObject(&vm.module->fields, "__file__", (KrkObj*)function->chunk.filename); function->name = krk_copyString(fromName, strlen(fromName));