Support comprehensions for dicts, tuples.

This commit is contained in:
K. Lange 2021-01-19 21:06:52 +09:00
parent 851d3df8cd
commit 895eb367ee
5 changed files with 224 additions and 177 deletions

View File

@ -18,7 +18,7 @@ On top of this, Kuroko adds a number of features inspired by Python, such as:
- Indentation-based block syntax.
- Collection types: `list`, `dict`, `tuple`, with compiler literal syntax (`[]`,`{}`,`(,)`).
- Iterable types, with `for ... in ...` syntax.
- List comprehensions (`[foo(x) for x in [1,2,3,4]]` and similar expressions).
- List and dict comprehensions (`[foo(x) for x in [1,2,3,4]]` and similar expressions).
- Pseudo-classes for basic values (eg. strings are pseudo-instances of a `str` class providing methods like `.format()`)
- Exception handling, with `try`/`except`/`raise`.
- Modules, both for native C code and managed Kuroko code.
@ -587,7 +587,7 @@ print(s)
# → {1, 2, 3, 4}
```
Lists can also be generated dynamically through _comprehensions_, just as in Python:
Lists, dicts, and tuples can also be generated dynamically through _comprehensions_, just as in Python:
```py
let fives = [x * 5 for x in [1,2,3,4,5]]
@ -595,7 +595,12 @@ print(fives)
# → [5, 10, 15, 20, 25]
```
_**Note:** Dictionary, tuple, and set comprehensions are not currently available, but are planned._
```py
let d = {'a': 1, 'b': 2, 'c': 3}
let dInverted = {v: k for k, v in d.items()}
print(d,dInverted)
# → {'a': 1, 'b': 2, 'c': 3} {1: 'a', 2: 'b', 3: 'c'}
```
### Exceptions

View File

@ -1649,27 +1649,6 @@ _anotherSimpleStatement:
}
}
static void grouping(int canAssign) {
startEatingWhitespace();
if (check(TOKEN_RIGHT_PAREN)) {
emitBytes(OP_TUPLE,0);
} else {
expression();
if (match(TOKEN_COMMA)) {
size_t argCount = 1;
if (!check(TOKEN_RIGHT_PAREN)) {
do {
expression();
argCount++;
} while (match(TOKEN_COMMA) && !check(TOKEN_RIGHT_PAREN));
}
EMIT_CONSTANT_OP(OP_TUPLE, argCount);
}
}
stopEatingWhitespace();
consume(TOKEN_RIGHT_PAREN, "Expect ')' after expression.");
}
static void unary(int canAssign) {
KrkTokenType operatorType = parser.previous.type;
@ -1895,31 +1874,7 @@ static void super_(int canAssign) {
EMIT_CONSTANT_OP(OP_GET_SUPER, ind);
}
static void list(int canAssign) {
size_t chunkBefore = currentChunk()->count;
startEatingWhitespace();
KrkToken listOf = syntheticToken("listOf");
size_t ind = identifierConstant(&listOf);
EMIT_CONSTANT_OP(OP_GET_GLOBAL, ind);
if (!check(TOKEN_RIGHT_SQUARE)) {
KrkScanner scannerBefore = krk_tellScanner();
Parser parserBefore = parser;
expression();
/* This is a bit complicated and the Pratt parser does not handle it
* well; if we read an expression and then saw a `for`, we need to back
* up and start over, as we'll need to define a variable _after_ it
* gets used in this expression; so we record the parser state before
* reading the first expression of a list constant. If it _is_ a real
* list constant, we'll see a comma next and we can begin the normal
* loop of counting arguments. */
if (match(TOKEN_FOR)) {
/* Roll back the earlier compiler */
currentChunk()->count = chunkBefore;
static void comprehension(KrkScanner scannerBefore, Parser parserBefore, const char buildFunc[], void (*inner)(ssize_t loopCounter)) {
/* Compile list comprehension as a function */
Compiler subcompiler;
initCompiler(&subcompiler, TYPE_FUNCTION);
@ -1930,7 +1885,7 @@ static void list(int canAssign) {
/* for i=0, */
emitConstant(INTEGER_VAL(0));
size_t indLoopCounter = current->localCount;
addLocal(syntheticToken("__loop_count"));
addLocal(syntheticToken(""));
defineVariable(indLoopCounter);
/* x in... */
@ -1943,7 +1898,7 @@ static void list(int canAssign) {
varCount++;
} while (match(TOKEN_COMMA));
consume(TOKEN_IN, "Only iterator loops (for ... in ...) are allowed in list comprehensions.");
consume(TOKEN_IN, "Only iterator loops (for ... in ...) are allowed in comprehensions.");
beginScope();
parsePrecedence(PREC_OR); /* Otherwise we can get trapped on a ternary */
@ -1951,7 +1906,7 @@ static void list(int canAssign) {
/* iterable... */
size_t indLoopIter = current->localCount;
addLocal(syntheticToken("__loop_iter"));
addLocal(syntheticToken(""));
defineVariable(indLoopIter);
/* Now try to call .__iter__ on the result to produce our iterator */
@ -2010,7 +1965,7 @@ static void list(int canAssign) {
parser = parserBefore;
beginScope();
expression();
inner(indLoopCounter);
endScope();
/* Then we can put the parser back to where it was at the end of
@ -2030,14 +1985,11 @@ static void list(int canAssign) {
* and we're done receiving objects, so mark this instruction
* offset as the exit target for the OP_JUMP_IF_FALSE above */
patchJump(exitJump);
/* Parse the ] that indicates the end of the list comprehension */
stopEatingWhitespace();
consume(TOKEN_RIGHT_SQUARE,"Expected ] at end of list expression.");
/* Pop the last loop expression result which was already stored */
emitByte(OP_POP);
/* Pull in listOf from the global namespace */
KrkToken listOf = syntheticToken("listOf");
size_t indList = identifierConstant(&listOf);
KrkToken collectionBuilder = syntheticToken(buildFunc);
size_t indList = identifierConstant(&collectionBuilder);
EMIT_CONSTANT_OP(OP_GET_GLOBAL, indList);
/* And move it into where we were storing the loop iterator */
EMIT_CONSTANT_OP(OP_SET_LOCAL, indLoopIter);
@ -2059,41 +2011,125 @@ static void list(int canAssign) {
/* And finally we can call the subfunction and get the result. */
emitBytes(OP_CALL, 0);
}
static void singleInner(ssize_t indLoopCounter) {
expression();
}
static void grouping(int canAssign) {
startEatingWhitespace();
if (check(TOKEN_RIGHT_PAREN)) {
emitBytes(OP_TUPLE,0);
} else {
size_t chunkBefore = currentChunk()->count;
KrkScanner scannerBefore = krk_tellScanner();
Parser parserBefore = parser;
expression();
if (match(TOKEN_FOR)) {
currentChunk()->count = chunkBefore;
comprehension(scannerBefore, parserBefore, "tupleOf", singleInner);
} else if (match(TOKEN_COMMA)) {
size_t argCount = 1;
if (!check(TOKEN_RIGHT_PAREN)) {
do {
expression();
argCount++;
} while (match(TOKEN_COMMA) && !check(TOKEN_RIGHT_PAREN));
}
EMIT_CONSTANT_OP(OP_TUPLE, argCount);
}
}
stopEatingWhitespace();
consume(TOKEN_RIGHT_PAREN, "Expect ')' after expression.");
}
static void list(int canAssign) {
size_t chunkBefore = currentChunk()->count;
startEatingWhitespace();
KrkToken listOf = syntheticToken("listOf");
size_t ind = identifierConstant(&listOf);
EMIT_CONSTANT_OP(OP_GET_GLOBAL, ind);
if (!check(TOKEN_RIGHT_SQUARE)) {
KrkScanner scannerBefore = krk_tellScanner();
Parser parserBefore = parser;
expression();
/* This is a bit complicated and the Pratt parser does not handle it
* well; if we read an expression and then saw a `for`, we need to back
* up and start over, as we'll need to define a variable _after_ it
* gets used in this expression; so we record the parser state before
* reading the first expression of a list constant. If it _is_ a real
* list constant, we'll see a comma next and we can begin the normal
* loop of counting arguments. */
if (match(TOKEN_FOR)) {
/* Roll back the earlier compiler */
currentChunk()->count = chunkBefore;
comprehension(scannerBefore, parserBefore, "listOf", singleInner);
} else {
size_t argCount = 1;
while (match(TOKEN_COMMA) && !check(TOKEN_RIGHT_SQUARE)) {
expression();
argCount++;
}
stopEatingWhitespace();
consume(TOKEN_RIGHT_SQUARE,"Expected ] at end of list expression.");
EMIT_CONSTANT_OP(OP_CALL, argCount);
}
} else {
/* Empty list expression */
stopEatingWhitespace();
advance();
emitBytes(OP_CALL, 0);
}
stopEatingWhitespace();
consume(TOKEN_RIGHT_SQUARE,"Expected ] at end of list expression.");
}
static void dictInner(ssize_t indLoopCounter) {
expression();
consume(TOKEN_COLON, "Expect colon after dict key.");
expression();
EMIT_CONSTANT_OP(OP_INC, indLoopCounter);
}
static void dict(int canAssign) {
size_t chunkBefore = currentChunk()->count;
startEatingWhitespace();
KrkToken dictOf = syntheticToken("dictOf");
size_t ind = identifierConstant(&dictOf);
EMIT_CONSTANT_OP(OP_GET_GLOBAL, ind);
size_t argCount = 0;
if (!check(TOKEN_RIGHT_BRACE)) {
do {
KrkScanner scannerBefore = krk_tellScanner();
Parser parserBefore = parser;
expression();
consume(TOKEN_COLON, "Expect colon after dict key.");
expression();
if (match(TOKEN_FOR)) {
/* Roll back the earlier compiler */
currentChunk()->count = chunkBefore;
comprehension(scannerBefore, parserBefore, "dictOf", dictInner);
} else {
size_t argCount = 2;
while (match(TOKEN_COMMA) && !check(TOKEN_RIGHT_BRACE)) {
expression();
consume(TOKEN_COLON, "Expect colon after dict key.");
expression();
argCount += 2;
} while (match(TOKEN_COMMA) && !check(TOKEN_RIGHT_BRACE));
}
EMIT_CONSTANT_OP(OP_CALL, argCount);
}
} else {
emitBytes(OP_CALL, 0);
}
stopEatingWhitespace();
consume(TOKEN_RIGHT_BRACE,"Expected } at end of dict expression.");
EMIT_CONSTANT_OP(OP_CALL, argCount);
}
#define RULE(token, a, b, c) [token] = {# token, a, b, c}

View File

@ -0,0 +1,4 @@
let d = {'a': 1, 'b': 2, 'c': 3}
let dInverted = {v: k for k, v in d.items()}
print(d,dInverted)

View File

@ -0,0 +1 @@
{'a': 1, 'b': 2, 'c': 3} {1: 'a', 2: 'b', 3: 'c'}

5
vm.c
View File

@ -3374,8 +3374,9 @@ void krk_initVM(int flags) {
krk_finalizeClass(vm.baseClasses.bytesClass);
/* Build global builtin functions. */
BUILTIN_FUNCTION("listOf", krk_list_of); /* Equivalent to list() */
BUILTIN_FUNCTION("dictOf", krk_dict_of); /* Equivalent to dict() */
BUILTIN_FUNCTION("listOf", krk_list_of);
BUILTIN_FUNCTION("dictOf", krk_dict_of);
BUILTIN_FUNCTION("tupleOf", _tuple_of);
BUILTIN_FUNCTION("isinstance", krk_isinstance);
BUILTIN_FUNCTION("globals", krk_globals);
BUILTIN_FUNCTION("dir", _dir);