mirror of https://github.com/postgres/postgres
Make websearch_to_tsquery() parse text in quotes as a single token
websearch_to_tsquery() splits text in quotes into tokens and connects them with
phrase operator on its own. However, that leads to surprising results when the
token contains no words.
For instance, websearch_to_tsquery('"aaa: bbb"') is 'aaa <2> bbb', because
it is equivalent of to_tsquery(E'aaa <-> \':\' <-> bbb'). But
websearch_to_tsquery('"aaa: bbb"') has to be 'aaa <-> bbb' in order to match
to_tsvector('aaa: bbb').
Since 0c4f355c6a
, we anyway connect lexemes of complex tokens with phrase
operators. Thus, let's just websearch_to_tsquery() parse text in quotes as
a single token. Therefore, websearch_to_tsquery() should process the quoted
text in the same way phraseto_tsquery() does. This solution is what we exactly
need and also simplifies the code.
This commit is an incompatible change, so we don't backpatch it.
Reported-by: Valentin Gatien-Baron
Discussion: https://postgr.es/m/CA%2B0DEqiZs7gdOd4ikmg%3D0UWG%2BSwWOLxPsk_JW-sx9WNOyrb0KQ%40mail.gmail.com
Author: Alexander Korotkov
Reviewed-by: Tom Lane, Zhihong Yu
This commit is contained in:
parent
651d005e76
commit
eb086056fe
|
@ -77,7 +77,6 @@ struct TSQueryParserStateData
|
||||||
char *buf; /* current scan point */
|
char *buf; /* current scan point */
|
||||||
int count; /* nesting count, incremented by (,
|
int count; /* nesting count, incremented by (,
|
||||||
* decremented by ) */
|
* decremented by ) */
|
||||||
bool in_quotes; /* phrase in quotes "" */
|
|
||||||
ts_parserstate state;
|
ts_parserstate state;
|
||||||
|
|
||||||
/* polish (prefix) notation in list, filled in by push* functions */
|
/* polish (prefix) notation in list, filled in by push* functions */
|
||||||
|
@ -235,9 +234,6 @@ parse_or_operator(TSQueryParserState pstate)
|
||||||
{
|
{
|
||||||
char *ptr = pstate->buf;
|
char *ptr = pstate->buf;
|
||||||
|
|
||||||
if (pstate->in_quotes)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
/* it should begin with "OR" literal */
|
/* it should begin with "OR" literal */
|
||||||
if (pg_strncasecmp(ptr, "or", 2) != 0)
|
if (pg_strncasecmp(ptr, "or", 2) != 0)
|
||||||
return false;
|
return false;
|
||||||
|
@ -398,38 +394,29 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
|
||||||
state->buf++;
|
state->buf++;
|
||||||
state->state = WAITOPERAND;
|
state->state = WAITOPERAND;
|
||||||
|
|
||||||
if (state->in_quotes)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
*operator = OP_NOT;
|
*operator = OP_NOT;
|
||||||
return PT_OPR;
|
return PT_OPR;
|
||||||
}
|
}
|
||||||
else if (t_iseq(state->buf, '"'))
|
else if (t_iseq(state->buf, '"'))
|
||||||
{
|
{
|
||||||
|
/* Everything in quotes is processed as a single token */
|
||||||
|
|
||||||
|
/* skip opening quote */
|
||||||
state->buf++;
|
state->buf++;
|
||||||
|
*strval = state->buf;
|
||||||
|
|
||||||
if (!state->in_quotes)
|
/* iterate to the closing quote or end of the string */
|
||||||
{
|
while (*state->buf != '\0' && !t_iseq(state->buf, '"'))
|
||||||
state->state = WAITOPERAND;
|
state->buf++;
|
||||||
|
*lenval = state->buf - *strval;
|
||||||
|
|
||||||
if (strchr(state->buf, '"'))
|
/* skip closing quote if not end of the string */
|
||||||
{
|
if (*state->buf != '\0')
|
||||||
/* quoted text should be ordered <-> */
|
state->buf++;
|
||||||
state->in_quotes = true;
|
|
||||||
return PT_OPEN;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* web search tolerates missing quotes */
|
state->state = WAITOPERATOR;
|
||||||
continue;
|
state->count++;
|
||||||
}
|
return PT_VAL;
|
||||||
else
|
|
||||||
{
|
|
||||||
/* we have to provide an operand */
|
|
||||||
state->in_quotes = false;
|
|
||||||
state->state = WAITOPERATOR;
|
|
||||||
pushStop(state);
|
|
||||||
return PT_CLOSE;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else if (ISOPERATOR(state->buf))
|
else if (ISOPERATOR(state->buf))
|
||||||
{
|
{
|
||||||
|
@ -467,24 +454,13 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
|
||||||
case WAITOPERATOR:
|
case WAITOPERATOR:
|
||||||
if (t_iseq(state->buf, '"'))
|
if (t_iseq(state->buf, '"'))
|
||||||
{
|
{
|
||||||
if (!state->in_quotes)
|
/*
|
||||||
{
|
* put implicit AND after an operand and handle this quote
|
||||||
/*
|
* in WAITOPERAND
|
||||||
* put implicit AND after an operand and handle this
|
*/
|
||||||
* quote in WAITOPERAND
|
state->state = WAITOPERAND;
|
||||||
*/
|
*operator = OP_AND;
|
||||||
state->state = WAITOPERAND;
|
return PT_OPR;
|
||||||
*operator = OP_AND;
|
|
||||||
return PT_OPR;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
state->buf++;
|
|
||||||
|
|
||||||
/* just close quotes */
|
|
||||||
state->in_quotes = false;
|
|
||||||
return PT_CLOSE;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else if (parse_or_operator(state))
|
else if (parse_or_operator(state))
|
||||||
{
|
{
|
||||||
|
@ -498,18 +474,8 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
|
||||||
}
|
}
|
||||||
else if (!t_isspace(state->buf))
|
else if (!t_isspace(state->buf))
|
||||||
{
|
{
|
||||||
if (state->in_quotes)
|
/* put implicit AND after an operand */
|
||||||
{
|
*operator = OP_AND;
|
||||||
/* put implicit <-> after an operand */
|
|
||||||
*operator = OP_PHRASE;
|
|
||||||
*weight = 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* put implicit AND after an operand */
|
|
||||||
*operator = OP_AND;
|
|
||||||
}
|
|
||||||
|
|
||||||
state->state = WAITOPERAND;
|
state->state = WAITOPERAND;
|
||||||
return PT_OPR;
|
return PT_OPR;
|
||||||
}
|
}
|
||||||
|
@ -846,7 +812,6 @@ parse_tsquery(char *buf,
|
||||||
state.buffer = buf;
|
state.buffer = buf;
|
||||||
state.buf = buf;
|
state.buf = buf;
|
||||||
state.count = 0;
|
state.count = 0;
|
||||||
state.in_quotes = false;
|
|
||||||
state.state = WAITFIRSTOPERAND;
|
state.state = WAITFIRSTOPERAND;
|
||||||
state.polstr = NIL;
|
state.polstr = NIL;
|
||||||
|
|
||||||
|
|
|
@ -2678,9 +2678,9 @@ select websearch_to_tsquery('simple', 'abc OR_abc');
|
||||||
|
|
||||||
-- test quotes
|
-- test quotes
|
||||||
select websearch_to_tsquery('english', '"pg_class pg');
|
select websearch_to_tsquery('english', '"pg_class pg');
|
||||||
websearch_to_tsquery
|
websearch_to_tsquery
|
||||||
-------------------------
|
---------------------------
|
||||||
'pg' <-> 'class' & 'pg'
|
'pg' <-> 'class' <-> 'pg'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
select websearch_to_tsquery('english', 'pg_class pg"');
|
select websearch_to_tsquery('english', 'pg_class pg"');
|
||||||
|
@ -2695,6 +2695,12 @@ select websearch_to_tsquery('english', '"pg_class pg"');
|
||||||
'pg' <-> 'class' <-> 'pg'
|
'pg' <-> 'class' <-> 'pg'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
select websearch_to_tsquery('english', '"pg_class : pg"');
|
||||||
|
websearch_to_tsquery
|
||||||
|
---------------------------
|
||||||
|
'pg' <-> 'class' <-> 'pg'
|
||||||
|
(1 row)
|
||||||
|
|
||||||
select websearch_to_tsquery('english', 'abc "pg_class pg"');
|
select websearch_to_tsquery('english', 'abc "pg_class pg"');
|
||||||
websearch_to_tsquery
|
websearch_to_tsquery
|
||||||
-----------------------------------
|
-----------------------------------
|
||||||
|
@ -2708,15 +2714,15 @@ select websearch_to_tsquery('english', '"pg_class pg" def');
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
|
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
|
||||||
websearch_to_tsquery
|
websearch_to_tsquery
|
||||||
--------------------------------------------------------
|
----------------------------------------------------
|
||||||
'abc' & 'pg' <-> ( 'pg' <-> 'class' ) <-> 'pg' & 'def'
|
'abc' & 'pg' <-> 'pg' <-> 'class' <-> 'pg' & 'def'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
|
select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
|
||||||
websearch_to_tsquery
|
websearch_to_tsquery
|
||||||
----------------------------------------
|
------------------------------------
|
||||||
'pg' <-> ( 'pg' <-> 'class' ) <-> 'pg'
|
'pg' <-> 'pg' <-> 'class' <-> 'pg'
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
select websearch_to_tsquery('english', '""pg pg_class pg""');
|
select websearch_to_tsquery('english', '""pg pg_class pg""');
|
||||||
|
|
|
@ -759,6 +759,7 @@ select websearch_to_tsquery('simple', 'abc OR_abc');
|
||||||
select websearch_to_tsquery('english', '"pg_class pg');
|
select websearch_to_tsquery('english', '"pg_class pg');
|
||||||
select websearch_to_tsquery('english', 'pg_class pg"');
|
select websearch_to_tsquery('english', 'pg_class pg"');
|
||||||
select websearch_to_tsquery('english', '"pg_class pg"');
|
select websearch_to_tsquery('english', '"pg_class pg"');
|
||||||
|
select websearch_to_tsquery('english', '"pg_class : pg"');
|
||||||
select websearch_to_tsquery('english', 'abc "pg_class pg"');
|
select websearch_to_tsquery('english', 'abc "pg_class pg"');
|
||||||
select websearch_to_tsquery('english', '"pg_class pg" def');
|
select websearch_to_tsquery('english', '"pg_class pg" def');
|
||||||
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
|
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
|
||||||
|
|
Loading…
Reference in New Issue