Speed up lexing of long JSON strings
Use optimized linear search when looking ahead for end quotes, backslashes, and non-printable characters. This results in nearly 40% faster JSON parsing on x86-64 when most values are long strings, and all platforms should see some improvement. Reviewed by Andres Freund and Nathan Bossart Discussion: https://www.postgresql.org/message-id/CAFBsxsGhaR2KQ5eisaK%3D6Vm60t%3DaxhD8Ckj1qFoCH1pktZi%2B2w%40mail.gmail.com Discussion: https://www.postgresql.org/message-id/CAFBsxsESLUyJ5spfOSyPrOvKUEYYNqsBosue9SV1j8ecgNXSKA%40mail.gmail.com
This commit is contained in:
parent
05519126a0
commit
0a8de93a48
@ -19,6 +19,7 @@
|
|||||||
|
|
||||||
#include "common/jsonapi.h"
|
#include "common/jsonapi.h"
|
||||||
#include "mb/pg_wchar.h"
|
#include "mb/pg_wchar.h"
|
||||||
|
#include "port/pg_lfind.h"
|
||||||
|
|
||||||
#ifndef FRONTEND
|
#ifndef FRONTEND
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
@ -844,7 +845,7 @@ json_lex_string(JsonLexContext *lex)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
char *p;
|
char *p = s;
|
||||||
|
|
||||||
if (hi_surrogate != -1)
|
if (hi_surrogate != -1)
|
||||||
return JSON_UNICODE_LOW_SURROGATE;
|
return JSON_UNICODE_LOW_SURROGATE;
|
||||||
@ -853,11 +854,17 @@ json_lex_string(JsonLexContext *lex)
|
|||||||
* Skip to the first byte that requires special handling, so we
|
* Skip to the first byte that requires special handling, so we
|
||||||
* can batch calls to appendBinaryStringInfo.
|
* can batch calls to appendBinaryStringInfo.
|
||||||
*/
|
*/
|
||||||
for (p = s; p < end; p++)
|
while (p < end - sizeof(Vector8) &&
|
||||||
|
!pg_lfind8('\\', (uint8 *) p, sizeof(Vector8)) &&
|
||||||
|
!pg_lfind8('"', (uint8 *) p, sizeof(Vector8)) &&
|
||||||
|
!pg_lfind8_le(31, (uint8 *) p, sizeof(Vector8)))
|
||||||
|
p += sizeof(Vector8);
|
||||||
|
|
||||||
|
for (; p < end; p++)
|
||||||
{
|
{
|
||||||
if (*p == '\\' || *p == '"')
|
if (*p == '\\' || *p == '"')
|
||||||
break;
|
break;
|
||||||
else if ((unsigned char) *p < 32)
|
else if ((unsigned char) *p <= 31)
|
||||||
{
|
{
|
||||||
/* Per RFC4627, these characters MUST be escaped. */
|
/* Per RFC4627, these characters MUST be escaped. */
|
||||||
/*
|
/*
|
||||||
|
@ -42,6 +42,19 @@ LINE 1: SELECT '"\v"'::json;
|
|||||||
^
|
^
|
||||||
DETAIL: Escape sequence "\v" is invalid.
|
DETAIL: Escape sequence "\v" is invalid.
|
||||||
CONTEXT: JSON data, line 1: "\v...
|
CONTEXT: JSON data, line 1: "\v...
|
||||||
|
-- Check fast path for longer strings (at least 16 bytes long)
|
||||||
|
SELECT ('"'||repeat('.', 12)||'abc"')::json; -- OK
|
||||||
|
json
|
||||||
|
-------------------
|
||||||
|
"............abc"
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT ('"'||repeat('.', 12)||'abc\n"')::json; -- OK, legal escapes
|
||||||
|
json
|
||||||
|
---------------------
|
||||||
|
"............abc\n"
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- see json_encoding test for input with unicode escapes
|
-- see json_encoding test for input with unicode escapes
|
||||||
-- Numbers.
|
-- Numbers.
|
||||||
SELECT '1'::json; -- OK
|
SELECT '1'::json; -- OK
|
||||||
|
@ -7,6 +7,11 @@ SELECT '"abc
|
|||||||
def"'::json; -- ERROR, unescaped newline in string constant
|
def"'::json; -- ERROR, unescaped newline in string constant
|
||||||
SELECT '"\n\"\\"'::json; -- OK, legal escapes
|
SELECT '"\n\"\\"'::json; -- OK, legal escapes
|
||||||
SELECT '"\v"'::json; -- ERROR, not a valid JSON escape
|
SELECT '"\v"'::json; -- ERROR, not a valid JSON escape
|
||||||
|
|
||||||
|
-- Check fast path for longer strings (at least 16 bytes long)
|
||||||
|
SELECT ('"'||repeat('.', 12)||'abc"')::json; -- OK
|
||||||
|
SELECT ('"'||repeat('.', 12)||'abc\n"')::json; -- OK, legal escapes
|
||||||
|
|
||||||
-- see json_encoding test for input with unicode escapes
|
-- see json_encoding test for input with unicode escapes
|
||||||
|
|
||||||
-- Numbers.
|
-- Numbers.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user