hstore: Tighten key/value parsing check for whitespaces
isspace() can be locale-sensitive depending on the platform, causing hstore to consider as whitespaces characters it should not see as such. For example, U+0105, being decoded as 0xC4 0x85 in UTF-8, would be discarded from the input given. This problem is similar to 9ae2661, though it was missed that hstore can also manipulate non-ASCII inputs, so replace the existing isspace() calls with scanner_isspace(). This problem exists for a long time, so backpatch all the way down. Author: Evan Jones Discussion: https://postgr.es/m/CA+HWA9awUW0+RV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig@mail.gmail.com Backpatch-through: 11
This commit is contained in:
parent
9920552e1e
commit
edf1de65e5
@ -15,7 +15,7 @@ PGFILEDESC = "hstore - key/value pair data type"
|
|||||||
|
|
||||||
HEADERS = hstore.h
|
HEADERS = hstore.h
|
||||||
|
|
||||||
REGRESS = hstore
|
REGRESS = hstore hstore_utf8
|
||||||
|
|
||||||
ifdef USE_PGXS
|
ifdef USE_PGXS
|
||||||
PG_CONFIG = pg_config
|
PG_CONFIG = pg_config
|
||||||
|
36
contrib/hstore/expected/hstore_utf8.out
Normal file
36
contrib/hstore/expected/hstore_utf8.out
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
/*
|
||||||
|
* This test must be run in a database with UTF-8 encoding,
|
||||||
|
* because other encodings don't support all the characters used.
|
||||||
|
*/
|
||||||
|
SELECT getdatabaseencoding() <> 'UTF8'
|
||||||
|
AS skip_test \gset
|
||||||
|
\if :skip_test
|
||||||
|
\quit
|
||||||
|
\endif
|
||||||
|
SET client_encoding = utf8;
|
||||||
|
-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
|
||||||
|
-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
|
||||||
|
SELECT E'key\u0105=>value\u0105'::hstore;
|
||||||
|
hstore
|
||||||
|
------------------
|
||||||
|
"keyą"=>"valueą"
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'keyą=>valueą'::hstore;
|
||||||
|
hstore
|
||||||
|
------------------
|
||||||
|
"keyą"=>"valueą"
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'ą=>ą'::hstore;
|
||||||
|
hstore
|
||||||
|
----------
|
||||||
|
"ą"=>"ą"
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT 'keyąfoo=>valueą'::hstore;
|
||||||
|
hstore
|
||||||
|
---------------------
|
||||||
|
"keyąfoo"=>"valueą"
|
||||||
|
(1 row)
|
||||||
|
|
8
contrib/hstore/expected/hstore_utf8_1.out
Normal file
8
contrib/hstore/expected/hstore_utf8_1.out
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
/*
|
||||||
|
* This test must be run in a database with UTF-8 encoding,
|
||||||
|
* because other encodings don't support all the characters used.
|
||||||
|
*/
|
||||||
|
SELECT getdatabaseencoding() <> 'UTF8'
|
||||||
|
AS skip_test \gset
|
||||||
|
\if :skip_test
|
||||||
|
\quit
|
@ -10,6 +10,7 @@
|
|||||||
#include "funcapi.h"
|
#include "funcapi.h"
|
||||||
#include "lib/stringinfo.h"
|
#include "lib/stringinfo.h"
|
||||||
#include "libpq/pqformat.h"
|
#include "libpq/pqformat.h"
|
||||||
|
#include "parser/scansup.h"
|
||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
#include "utils/json.h"
|
#include "utils/json.h"
|
||||||
#include "utils/jsonapi.h"
|
#include "utils/jsonapi.h"
|
||||||
@ -87,7 +88,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
|
|||||||
{
|
{
|
||||||
st = GV_WAITESCIN;
|
st = GV_WAITESCIN;
|
||||||
}
|
}
|
||||||
else if (!isspace((unsigned char) *(state->ptr)))
|
else if (!scanner_isspace((unsigned char) *(state->ptr)))
|
||||||
{
|
{
|
||||||
*(state->cur) = *(state->ptr);
|
*(state->cur) = *(state->ptr);
|
||||||
state->cur++;
|
state->cur++;
|
||||||
@ -110,7 +111,7 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
|
|||||||
state->ptr--;
|
state->ptr--;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else if (isspace((unsigned char) *(state->ptr)))
|
else if (scanner_isspace((unsigned char) *(state->ptr)))
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -218,7 +219,7 @@ parse_hstore(HSParser *state)
|
|||||||
{
|
{
|
||||||
elog(ERROR, "Unexpected end of string");
|
elog(ERROR, "Unexpected end of string");
|
||||||
}
|
}
|
||||||
else if (!isspace((unsigned char) *(state->ptr)))
|
else if (!scanner_isspace((unsigned char) *(state->ptr)))
|
||||||
{
|
{
|
||||||
elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
|
elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
|
||||||
}
|
}
|
||||||
@ -266,7 +267,7 @@ parse_hstore(HSParser *state)
|
|||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else if (!isspace((unsigned char) *(state->ptr)))
|
else if (!scanner_isspace((unsigned char) *(state->ptr)))
|
||||||
{
|
{
|
||||||
elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
|
elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin));
|
||||||
}
|
}
|
||||||
|
19
contrib/hstore/sql/hstore_utf8.sql
Normal file
19
contrib/hstore/sql/hstore_utf8.sql
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
/*
|
||||||
|
* This test must be run in a database with UTF-8 encoding,
|
||||||
|
* because other encodings don't support all the characters used.
|
||||||
|
*/
|
||||||
|
|
||||||
|
SELECT getdatabaseencoding() <> 'UTF8'
|
||||||
|
AS skip_test \gset
|
||||||
|
\if :skip_test
|
||||||
|
\quit
|
||||||
|
\endif
|
||||||
|
|
||||||
|
SET client_encoding = utf8;
|
||||||
|
|
||||||
|
-- UTF-8 locale bug on macOS: isspace(0x85) returns true. \u0105 encodes
|
||||||
|
-- as 0xc4 0x85 in UTF-8; the 0x85 was interpreted here as a whitespace.
|
||||||
|
SELECT E'key\u0105=>value\u0105'::hstore;
|
||||||
|
SELECT 'keyą=>valueą'::hstore;
|
||||||
|
SELECT 'ą=>ą'::hstore;
|
||||||
|
SELECT 'keyąfoo=>valueą'::hstore;
|
Loading…
x
Reference in New Issue
Block a user