json: Nicer recovery from lexical errors
When the lexer chokes on an input character, it consumes the
character, emits a JSON error token, and enters its start state. This
can lead to suboptimal error recovery. For instance, input
0123 ,
produces the tokens
JSON_ERROR 01
JSON_INTEGER 23
JSON_COMMA ,
Make the lexer skip characters after a lexical error until a
structural character ('[', ']', '{', '}', ':', ','), an ASCII control
character, or '\xFE', or '\xFF'.
Note that we must not skip ASCII control characters, '\xFE', '\xFF',
because those are documented to force the JSON parser into known-good
state, by docs/interop/qmp-spec.txt.
The lexer now produces
JSON_ERROR 01
JSON_COMMA ,
Update qmp-test for the nicer error recovery: QMP now reports just one
error for input %p instead of two. Also drop the newline after %p; it
was needed to tease out the second error.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
Message-Id: <20180831075841.13363-5-armbru@redhat.com>
[Conflict with commit ebb4d82d88
resolved]
This commit is contained in:
parent
c0ee3afa7f
commit
0f07a5d5f1
@ -101,6 +101,7 @@
|
||||
|
||||
enum json_lexer_state {
|
||||
IN_ERROR = 0, /* must really be 0, see json_lexer[] */
|
||||
IN_RECOVERY,
|
||||
IN_DQ_STRING_ESCAPE,
|
||||
IN_DQ_STRING,
|
||||
IN_SQ_STRING_ESCAPE,
|
||||
@ -130,6 +131,28 @@ QEMU_BUILD_BUG_ON(IN_START_INTERP != IN_START + 1);
|
||||
static const uint8_t json_lexer[][256] = {
|
||||
/* Relies on default initialization to IN_ERROR! */
|
||||
|
||||
/* error recovery */
|
||||
[IN_RECOVERY] = {
|
||||
/*
|
||||
* Skip characters until a structural character, an ASCII
|
||||
* control character other than '\t', or impossible UTF-8
|
||||
* bytes '\xFE', '\xFF'. Structural characters and line
|
||||
* endings are promising resynchronization points. Clients
|
||||
* may use the others to force the JSON parser into known-good
|
||||
* state; see docs/interop/qmp-spec.txt.
|
||||
*/
|
||||
[0 ... 0x1F] = IN_START | LOOKAHEAD,
|
||||
[0x20 ... 0xFD] = IN_RECOVERY,
|
||||
[0xFE ... 0xFF] = IN_START | LOOKAHEAD,
|
||||
['\t'] = IN_RECOVERY,
|
||||
['['] = IN_START | LOOKAHEAD,
|
||||
[']'] = IN_START | LOOKAHEAD,
|
||||
['{'] = IN_START | LOOKAHEAD,
|
||||
['}'] = IN_START | LOOKAHEAD,
|
||||
[':'] = IN_START | LOOKAHEAD,
|
||||
[','] = IN_START | LOOKAHEAD,
|
||||
},
|
||||
|
||||
/* double quote string */
|
||||
[IN_DQ_STRING_ESCAPE] = {
|
||||
[0x20 ... 0xFD] = IN_DQ_STRING,
|
||||
@ -301,26 +324,18 @@ static void json_lexer_feed_char(JSONLexer *lexer, char ch, bool flush)
|
||||
/* fall through */
|
||||
case JSON_SKIP:
|
||||
g_string_truncate(lexer->token, 0);
|
||||
/* fall through */
|
||||
case IN_START:
|
||||
new_state = lexer->start_state;
|
||||
break;
|
||||
case IN_ERROR:
|
||||
/* XXX: To avoid having previous bad input leaving the parser in an
|
||||
* unresponsive state where we consume unpredictable amounts of
|
||||
* subsequent "good" input, percolate this error state up to the
|
||||
* parser by emitting a JSON_ERROR token, then reset lexer state.
|
||||
*
|
||||
* Also note that this handling is required for reliable channel
|
||||
* negotiation between QMP and the guest agent, since chr(0xFF)
|
||||
* is placed at the beginning of certain events to ensure proper
|
||||
* delivery when the channel is in an unknown state. chr(0xFF) is
|
||||
* never a valid ASCII/UTF-8 sequence, so this should reliably
|
||||
* induce an error/flush state.
|
||||
*/
|
||||
json_message_process_token(lexer, lexer->token, JSON_ERROR,
|
||||
lexer->x, lexer->y);
|
||||
new_state = IN_RECOVERY;
|
||||
/* fall through */
|
||||
case IN_RECOVERY:
|
||||
g_string_truncate(lexer->token, 0);
|
||||
lexer->state = lexer->start_state;
|
||||
return;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -76,10 +76,7 @@ static void test_malformed(QTestState *qts)
|
||||
assert_recovered(qts);
|
||||
|
||||
/* lexical error: interpolation */
|
||||
qtest_qmp_send_raw(qts, "%%p\n");
|
||||
/* two errors, one for "%", one for "p" */
|
||||
resp = qtest_qmp_receive(qts);
|
||||
qmp_assert_error_class(resp, "GenericError");
|
||||
qtest_qmp_send_raw(qts, "%%p");
|
||||
resp = qtest_qmp_receive(qts);
|
||||
qmp_assert_error_class(resp, "GenericError");
|
||||
assert_recovered(qts);
|
||||
|
Loading…
Reference in New Issue
Block a user