From 340db1ed82f8ced40a3e778c08963005369e2926 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Thu, 23 Aug 2018 18:39:45 +0200 Subject: [PATCH] json: Reject unescaped control characters Fix the lexer to reject unescaped control characters in JSON strings, in accordance with RFC 8259 "The JavaScript Object Notation (JSON) Data Interchange Format". Bonus: we now recover more nicely from unclosed strings. E.g. {"one: 1}\n{"two": 2} now recovers cleanly after the newline, where before the lexer remained confused until the next unpaired double quote or lexical error. Signed-off-by: Markus Armbruster Reviewed-by: Eric Blake Message-Id: <20180823164025.12553-19-armbru@redhat.com> --- qobject/json-lexer.c | 4 ++-- tests/check-qjson.c | 6 +----- tests/qmp-test.c | 4 ++-- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/qobject/json-lexer.c b/qobject/json-lexer.c index 7c0875d225..e85e9a78ff 100644 --- a/qobject/json-lexer.c +++ b/qobject/json-lexer.c @@ -115,7 +115,7 @@ static const uint8_t json_lexer[][256] = { ['u'] = IN_DQ_UCODE0, }, [IN_DQ_STRING] = { - [1 ... 0xBF] = IN_DQ_STRING, + [0x20 ... 0xBF] = IN_DQ_STRING, [0xC2 ... 0xF4] = IN_DQ_STRING, ['\\'] = IN_DQ_STRING_ESCAPE, ['"'] = JSON_STRING, @@ -155,7 +155,7 @@ static const uint8_t json_lexer[][256] = { ['u'] = IN_SQ_UCODE0, }, [IN_SQ_STRING] = { - [1 ... 0xBF] = IN_SQ_STRING, + [0x20 ... 0xBF] = IN_SQ_STRING, [0xC2 ... 0xF4] = IN_SQ_STRING, ['\\'] = IN_SQ_STRING_ESCAPE, ['\''] = JSON_STRING, diff --git a/tests/check-qjson.c b/tests/check-qjson.c index 1688b2f5c1..f1405ad47a 100644 --- a/tests/check-qjson.c +++ b/tests/check-qjson.c @@ -202,11 +202,7 @@ static void utf8_string(void) "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" "\x10\x11\x12\x13\x14\x15\x16\x17" "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", - /* bug: not corrected (valid UTF-8, but invalid JSON) */ - "\x01\x02\x03\x04\x05\x06\x07" - "\x08\x09\x0A\x0B\x0C\x0D\x0E\x0F" - "\x10\x11\x12\x13\x14\x15\x16\x17" - "\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F", + NULL, "\\u0001\\u0002\\u0003\\u0004\\u0005\\u0006\\u0007" "\\b\\t\\n\\u000B\\f\\r\\u000E\\u000F" "\\u0010\\u0011\\u0012\\u0013\\u0014\\u0015\\u0016\\u0017" diff --git a/tests/qmp-test.c b/tests/qmp-test.c index 5edc97f63f..7b3ba17c4a 100644 --- a/tests/qmp-test.c +++ b/tests/qmp-test.c @@ -86,9 +86,9 @@ static void test_malformed(QTestState *qts) g_assert(recovered(qts)); /* lexical error: control character in string */ - qtest_qmp_send_raw(qts, "{'execute': 'nonexistent', 'id':'\n'}"); + qtest_qmp_send_raw(qts, "{'execute': 'nonexistent', 'id':'\n"); resp = qtest_qmp_receive(qts); - g_assert_cmpstr(get_error_class(resp), ==, "CommandNotFound"); /* BUG */ + g_assert_cmpstr(get_error_class(resp), ==, "GenericError"); qobject_unref(resp); g_assert(recovered(qts));