mirror of https://github.com/postgres/postgres
Preserve integer and float values accurately in (de)serialize_deflist.
Previously, this code just smashed all types of DefElem values to strings, cavalierly reasoning that nobody would care. But in point of fact, most of the defGetFoo functions do distinguish among different input syntaxes; for instance defGetBoolean will accept 1 as an integer but not "1" as a string. This led to CREATE/ALTER TEXT SEARCH DICTIONARY accepting 0 and 1 as values for boolean dictionary properties, only to have the dictionary fail at runtime. We can upgrade this behavior by teaching serialize_deflist that it does not need to quote T_Integer or T_Float nodes' values on output, and then teaching deserialize_deflist to restore unquoted integer or float values as the appropriate node type. This should not break anything using pg_ts_dict.dictinitoption, since that field is just defined as being something valid to include in CREATE TEXT SEARCH DICTIONARY. deserialize_deflist is also used to parse the options arguments for the ts_headline family of functions, but so far as I can see this won't cause any problems there either: the only consumer of that output is prsd_headline which always uses defGetString. (Really that's a bad idea, but I won't risk changing it here.) This is surely a bug fix, but given the lack of field complaints I don't think it's necessary to back-patch. Discussion: https://postgr.es/m/CAMkU=1xRcs_BUPzR0+V3WndaCAv0E_m3h6aUEJ8NF-sY1nnHsw@mail.gmail.com
This commit is contained in:
parent
40b3e2c201
commit
d01f03a495
|
@ -300,8 +300,10 @@ select ts_lexize('intdict', '314532610153');
|
|||
{314532}
|
||||
(1 row)
|
||||
|
||||
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = -214783648);
|
||||
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = -214783648); -- fail
|
||||
ERROR: maxlen value has to be >= 1
|
||||
-- This ought to fail, perhaps, but historically it has not:
|
||||
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 6.7);
|
||||
select ts_lexize('intdict', '-40865854');
|
||||
ts_lexize
|
||||
-----------
|
||||
|
@ -327,3 +329,28 @@ select ts_lexize('intdict', '+40865854');
|
|||
{408658}
|
||||
(1 row)
|
||||
|
||||
ALTER TEXT SEARCH DICTIONARY intdict (REJECTLONG = 1);
|
||||
select ts_lexize('intdict', '-40865854');
|
||||
ts_lexize
|
||||
-----------
|
||||
{}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '-4086585');
|
||||
ts_lexize
|
||||
-----------
|
||||
{}
|
||||
(1 row)
|
||||
|
||||
select ts_lexize('intdict', '-408658');
|
||||
ts_lexize
|
||||
-----------
|
||||
{408658}
|
||||
(1 row)
|
||||
|
||||
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'intdict';
|
||||
dictinitoption
|
||||
-----------------------------------------------
|
||||
maxlen = 6.7, absval = 'true', rejectlong = 1
|
||||
(1 row)
|
||||
|
||||
|
|
|
@ -52,10 +52,18 @@ select ts_lexize('intdict', '313425');
|
|||
select ts_lexize('intdict', '641439323669');
|
||||
select ts_lexize('intdict', '314532610153');
|
||||
|
||||
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = -214783648);
|
||||
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = -214783648); -- fail
|
||||
-- This ought to fail, perhaps, but historically it has not:
|
||||
ALTER TEXT SEARCH DICTIONARY intdict (MAXLEN = 6.7);
|
||||
|
||||
select ts_lexize('intdict', '-40865854');
|
||||
select ts_lexize('intdict', '+40865854');
|
||||
ALTER TEXT SEARCH DICTIONARY intdict (ABSVAL = true);
|
||||
select ts_lexize('intdict', '-40865854');
|
||||
select ts_lexize('intdict', '+40865854');
|
||||
ALTER TEXT SEARCH DICTIONARY intdict (REJECTLONG = 1);
|
||||
select ts_lexize('intdict', '-40865854');
|
||||
select ts_lexize('intdict', '-4086585');
|
||||
select ts_lexize('intdict', '-408658');
|
||||
|
||||
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'intdict';
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
#include "commands/alter.h"
|
||||
#include "commands/defrem.h"
|
||||
#include "commands/event_trigger.h"
|
||||
#include "common/string.h"
|
||||
#include "miscadmin.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "parser/parse_func.h"
|
||||
|
@ -52,6 +53,8 @@ static void MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
|
|||
HeapTuple tup, Relation relMap);
|
||||
static void DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
|
||||
HeapTuple tup, Relation relMap);
|
||||
static DefElem *buildDefItem(const char *name, const char *val,
|
||||
bool was_quoted);
|
||||
|
||||
|
||||
/* --------------------- TS Parser commands ------------------------ */
|
||||
|
@ -1519,9 +1522,6 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
|
|||
* For the convenience of pg_dump, the output is formatted exactly as it
|
||||
* would need to appear in CREATE TEXT SEARCH DICTIONARY to reproduce the
|
||||
* same options.
|
||||
*
|
||||
* Note that we assume that only the textual representation of an option's
|
||||
* value is interesting --- hence, non-string DefElems get forced to strings.
|
||||
*/
|
||||
text *
|
||||
serialize_deflist(List *deflist)
|
||||
|
@ -1539,19 +1539,30 @@ serialize_deflist(List *deflist)
|
|||
|
||||
appendStringInfo(&buf, "%s = ",
|
||||
quote_identifier(defel->defname));
|
||||
/* If backslashes appear, force E syntax to determine their handling */
|
||||
if (strchr(val, '\\'))
|
||||
appendStringInfoChar(&buf, ESCAPE_STRING_SYNTAX);
|
||||
appendStringInfoChar(&buf, '\'');
|
||||
while (*val)
|
||||
{
|
||||
char ch = *val++;
|
||||
|
||||
if (SQL_STR_DOUBLE(ch, true))
|
||||
/*
|
||||
* If the value is a T_Integer or T_Float, emit it without quotes,
|
||||
* otherwise with quotes. This is essential to allow correct
|
||||
* reconstruction of the node type as well as the value.
|
||||
*/
|
||||
if (IsA(defel->arg, Integer) || IsA(defel->arg, Float))
|
||||
appendStringInfoString(&buf, val);
|
||||
else
|
||||
{
|
||||
/* If backslashes appear, force E syntax to quote them safely */
|
||||
if (strchr(val, '\\'))
|
||||
appendStringInfoChar(&buf, ESCAPE_STRING_SYNTAX);
|
||||
appendStringInfoChar(&buf, '\'');
|
||||
while (*val)
|
||||
{
|
||||
char ch = *val++;
|
||||
|
||||
if (SQL_STR_DOUBLE(ch, true))
|
||||
appendStringInfoChar(&buf, ch);
|
||||
appendStringInfoChar(&buf, ch);
|
||||
appendStringInfoChar(&buf, ch);
|
||||
}
|
||||
appendStringInfoChar(&buf, '\'');
|
||||
}
|
||||
appendStringInfoChar(&buf, '\'');
|
||||
if (lnext(deflist, l) != NULL)
|
||||
appendStringInfoString(&buf, ", ");
|
||||
}
|
||||
|
@ -1566,7 +1577,7 @@ serialize_deflist(List *deflist)
|
|||
*
|
||||
* This is also used for prsheadline options, so for backward compatibility
|
||||
* we need to accept a few things serialize_deflist() will never emit:
|
||||
* in particular, unquoted and double-quoted values.
|
||||
* in particular, unquoted and double-quoted strings.
|
||||
*/
|
||||
List *
|
||||
deserialize_deflist(Datum txt)
|
||||
|
@ -1694,8 +1705,9 @@ deserialize_deflist(Datum txt)
|
|||
{
|
||||
*wsptr++ = '\0';
|
||||
result = lappend(result,
|
||||
makeDefElem(pstrdup(workspace),
|
||||
(Node *) makeString(pstrdup(startvalue)), -1));
|
||||
buildDefItem(workspace,
|
||||
startvalue,
|
||||
true));
|
||||
state = CS_WAITKEY;
|
||||
}
|
||||
}
|
||||
|
@ -1726,8 +1738,9 @@ deserialize_deflist(Datum txt)
|
|||
{
|
||||
*wsptr++ = '\0';
|
||||
result = lappend(result,
|
||||
makeDefElem(pstrdup(workspace),
|
||||
(Node *) makeString(pstrdup(startvalue)), -1));
|
||||
buildDefItem(workspace,
|
||||
startvalue,
|
||||
true));
|
||||
state = CS_WAITKEY;
|
||||
}
|
||||
}
|
||||
|
@ -1741,8 +1754,9 @@ deserialize_deflist(Datum txt)
|
|||
{
|
||||
*wsptr++ = '\0';
|
||||
result = lappend(result,
|
||||
makeDefElem(pstrdup(workspace),
|
||||
(Node *) makeString(pstrdup(startvalue)), -1));
|
||||
buildDefItem(workspace,
|
||||
startvalue,
|
||||
false));
|
||||
state = CS_WAITKEY;
|
||||
}
|
||||
else
|
||||
|
@ -1760,8 +1774,9 @@ deserialize_deflist(Datum txt)
|
|||
{
|
||||
*wsptr++ = '\0';
|
||||
result = lappend(result,
|
||||
makeDefElem(pstrdup(workspace),
|
||||
(Node *) makeString(pstrdup(startvalue)), -1));
|
||||
buildDefItem(workspace,
|
||||
startvalue,
|
||||
false));
|
||||
}
|
||||
else if (state != CS_WAITKEY)
|
||||
ereport(ERROR,
|
||||
|
@ -1773,3 +1788,36 @@ deserialize_deflist(Datum txt)
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Build one DefElem for deserialize_deflist
|
||||
*/
|
||||
static DefElem *
|
||||
buildDefItem(const char *name, const char *val, bool was_quoted)
|
||||
{
|
||||
/* If input was quoted, always emit as string */
|
||||
if (!was_quoted && val[0] != '\0')
|
||||
{
|
||||
int v;
|
||||
char *endptr;
|
||||
|
||||
/* Try to parse as an integer */
|
||||
errno = 0;
|
||||
v = strtoint(val, &endptr, 10);
|
||||
if (errno == 0 && *endptr == '\0')
|
||||
return makeDefElem(pstrdup(name),
|
||||
(Node *) makeInteger(v),
|
||||
-1);
|
||||
/* Nope, how about as a float? */
|
||||
errno = 0;
|
||||
(void) strtod(val, &endptr);
|
||||
if (errno == 0 && *endptr == '\0')
|
||||
return makeDefElem(pstrdup(name),
|
||||
(Node *) makeFloat(pstrdup(val)),
|
||||
-1);
|
||||
}
|
||||
/* Just make it a string */
|
||||
return makeDefElem(pstrdup(name),
|
||||
(Node *) makeString(pstrdup(val)),
|
||||
-1);
|
||||
}
|
||||
|
|
|
@ -470,6 +470,41 @@ SELECT ts_lexize('synonym', 'indices');
|
|||
{index}
|
||||
(1 row)
|
||||
|
||||
-- test altering boolean parameters
|
||||
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
|
||||
dictinitoption
|
||||
-----------------------------
|
||||
synonyms = 'synonym_sample'
|
||||
(1 row)
|
||||
|
||||
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 1);
|
||||
SELECT ts_lexize('synonym', 'PoStGrEs');
|
||||
ts_lexize
|
||||
-----------
|
||||
|
||||
(1 row)
|
||||
|
||||
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
|
||||
dictinitoption
|
||||
------------------------------------------------
|
||||
synonyms = 'synonym_sample', casesensitive = 1
|
||||
(1 row)
|
||||
|
||||
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 2); -- fail
|
||||
ERROR: casesensitive requires a Boolean value
|
||||
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = off);
|
||||
SELECT ts_lexize('synonym', 'PoStGrEs');
|
||||
ts_lexize
|
||||
-----------
|
||||
{pgsql}
|
||||
(1 row)
|
||||
|
||||
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
|
||||
dictinitoption
|
||||
----------------------------------------------------
|
||||
synonyms = 'synonym_sample', casesensitive = 'off'
|
||||
(1 row)
|
||||
|
||||
-- Create and simple test thesaurus dictionary
|
||||
-- More tests in configuration checks because ts_lexize()
|
||||
-- cannot pass more than one word to thesaurus.
|
||||
|
|
|
@ -148,6 +148,19 @@ SELECT ts_lexize('synonym', 'PoStGrEs');
|
|||
SELECT ts_lexize('synonym', 'Gogle');
|
||||
SELECT ts_lexize('synonym', 'indices');
|
||||
|
||||
-- test altering boolean parameters
|
||||
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
|
||||
|
||||
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 1);
|
||||
SELECT ts_lexize('synonym', 'PoStGrEs');
|
||||
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
|
||||
|
||||
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 2); -- fail
|
||||
|
||||
ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = off);
|
||||
SELECT ts_lexize('synonym', 'PoStGrEs');
|
||||
SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
|
||||
|
||||
-- Create and simple test thesaurus dictionary
|
||||
-- More tests in configuration checks because ts_lexize()
|
||||
-- cannot pass more than one word to thesaurus.
|
||||
|
|
Loading…
Reference in New Issue