Fix shared tsvector/tsquery input code so that we don't say "syntax error in
tsvector" when we are really parsing a tsquery. Report the bogus input, too. Make styles of some related error messages more consistent.
This commit is contained in:
parent
dfc6f130b4
commit
1ea47dd8cb
@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.7 2007/09/11 16:01:40 teodor Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.8 2007/10/21 22:29:56 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -141,7 +141,7 @@ gettoken_query(TSQueryParserState state,
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error at start of operand in tsearch query: \"%s\"",
|
||||
errmsg("syntax error in tsquery: \"%s\"",
|
||||
state->buffer)));
|
||||
}
|
||||
else if (!t_isspace(state->buf))
|
||||
@ -159,7 +159,7 @@ gettoken_query(TSQueryParserState state,
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("no operand in tsearch query: \"%s\"",
|
||||
errmsg("no operand in tsquery: \"%s\"",
|
||||
state->buffer)));
|
||||
}
|
||||
break;
|
||||
@ -232,12 +232,12 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
|
||||
if (distance >= MAXSTRPOS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("value is too big in tsearch query: \"%s\"",
|
||||
errmsg("value is too big in tsquery: \"%s\"",
|
||||
state->buffer)));
|
||||
if (lenval >= MAXSTRLEN)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("operand is too long in tsearch query: \"%s\"",
|
||||
errmsg("operand is too long in tsquery: \"%s\"",
|
||||
state->buffer)));
|
||||
|
||||
tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
|
||||
@ -264,7 +264,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
|
||||
if (lenval >= MAXSTRLEN)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("word is too long in tsearch query: \"%s\"",
|
||||
errmsg("word is too long in tsquery: \"%s\"",
|
||||
state->buffer)));
|
||||
|
||||
INIT_CRC32(valcrc);
|
||||
@ -372,7 +372,7 @@ makepol(TSQueryParserState state,
|
||||
default:
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsearch query: \"%s\"",
|
||||
errmsg("syntax error in tsquery: \"%s\"",
|
||||
state->buffer)));
|
||||
}
|
||||
}
|
||||
@ -478,7 +478,7 @@ parse_tsquery(char *buf,
|
||||
state.polstr = NIL;
|
||||
|
||||
/* init value parser's state */
|
||||
state.valstate = init_tsvector_parser(NULL, true);
|
||||
state.valstate = init_tsvector_parser(state.buffer, true, true);
|
||||
|
||||
/* init list of operand */
|
||||
state.sumlen = 0;
|
||||
|
@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.5 2007/10/21 22:29:56 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -204,7 +204,7 @@ tsvectorin(PG_FUNCTION_ARGS)
|
||||
|
||||
pg_verifymbstr(buf, strlen(buf), false);
|
||||
|
||||
state = init_tsvector_parser(buf, false);
|
||||
state = init_tsvector_parser(buf, false, false);
|
||||
|
||||
arrlen = 64;
|
||||
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
|
||||
@ -224,7 +224,7 @@ tsvectorin(PG_FUNCTION_ARGS)
|
||||
if (cur - tmpbuf > MAXSTRPOS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("position value too large")));
|
||||
errmsg("position value is too large")));
|
||||
|
||||
/*
|
||||
* Enlarge buffers if needed
|
||||
@ -496,7 +496,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
|
||||
datalen += lex_len;
|
||||
|
||||
if (i > 0 && WordEntryCMP(&vec->entries[i], &vec->entries[i - 1], STRPTR(vec)) <= 0)
|
||||
elog(ERROR, "lexemes are unordered");
|
||||
elog(ERROR, "lexemes are misordered");
|
||||
|
||||
/* Receive positions */
|
||||
|
||||
@ -523,7 +523,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
|
||||
{
|
||||
wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
|
||||
if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
|
||||
elog(ERROR, "position information is unordered");
|
||||
elog(ERROR, "position information is misordered");
|
||||
}
|
||||
|
||||
datalen += (npos + 1) * sizeof(WordEntry);
|
||||
|
@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.1 2007/09/07 15:09:56 teodor Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.2 2007/10/21 22:29:56 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -20,35 +20,49 @@
|
||||
#include "tsearch/ts_utils.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
|
||||
/*
|
||||
* Private state of tsvector parser. Note that tsquery also uses this code to
|
||||
* parse its input, hence the boolean flags. The two flags are both true or
|
||||
* both false in current usage, but we keep them separate for clarity.
|
||||
* is_tsquery affects *only* the content of error messages.
|
||||
*/
|
||||
struct TSVectorParseStateData
|
||||
{
|
||||
char *prsbuf;
|
||||
char *word; /* buffer to hold the current word */
|
||||
int len; /* size in bytes allocated for 'word' */
|
||||
bool oprisdelim;
|
||||
char *prsbuf; /* next input character */
|
||||
char *bufstart; /* whole string (used only for errors) */
|
||||
char *word; /* buffer to hold the current word */
|
||||
int len; /* size in bytes allocated for 'word' */
|
||||
int eml; /* max bytes per character */
|
||||
bool oprisdelim; /* treat ! | * ( ) as delimiters? */
|
||||
bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* Initializes parser for the input string. If oprisdelim is set, the
|
||||
* following characters are treated as delimiters in addition to whitespace:
|
||||
* ! | & ( )
|
||||
*/
|
||||
TSVectorParseState
|
||||
init_tsvector_parser(char *input, bool oprisdelim)
|
||||
init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
|
||||
{
|
||||
TSVectorParseState state;
|
||||
|
||||
state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
|
||||
state->prsbuf = input;
|
||||
state->bufstart = input;
|
||||
state->len = 32;
|
||||
state->word = (char *) palloc(state->len);
|
||||
state->eml = pg_database_encoding_max_length();
|
||||
state->oprisdelim = oprisdelim;
|
||||
state->is_tsquery = is_tsquery;
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reinitializes parser for parsing 'input', instead of previous input.
|
||||
* Reinitializes parser to parse 'input', instead of previous input.
|
||||
*/
|
||||
void
|
||||
reset_tsvector_parser(TSVectorParseState state, char *input)
|
||||
@ -66,21 +80,21 @@ close_tsvector_parser(TSVectorParseState state)
|
||||
pfree(state);
|
||||
}
|
||||
|
||||
/* increase the size of 'word' if needed to hold one more character */
|
||||
#define RESIZEPRSBUF \
|
||||
do { \
|
||||
if ( curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
|
||||
int clen = curpos - state->word; \
|
||||
if ( clen + state->eml >= state->len ) \
|
||||
{ \
|
||||
int clen = curpos - state->word; \
|
||||
state->len *= 2; \
|
||||
state->word = (char*)repalloc( (void*)state->word, state->len ); \
|
||||
state->word = (char *) repalloc(state->word, state->len); \
|
||||
curpos = state->word + clen; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
|
||||
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
|
||||
|
||||
/* Fills the output parameters, and returns true */
|
||||
/* Fills gettoken_tsvector's output parameters, and returns true */
|
||||
#define RETURN_TOKEN \
|
||||
do { \
|
||||
if (pos_ptr != NULL) \
|
||||
@ -111,18 +125,34 @@ do { \
|
||||
#define WAITPOSDELIM 7
|
||||
#define WAITCHARCMPLX 8
|
||||
|
||||
#define PRSSYNTAXERROR prssyntaxerror(state)
|
||||
|
||||
static void
|
||||
prssyntaxerror(TSVectorParseState state)
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
state->is_tsquery ?
|
||||
errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
|
||||
errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Get next token from string being parsed. Returns false if
|
||||
* end of input string is reached, otherwise strval, lenval, pos_ptr
|
||||
* and poslen output parameters are filled in:
|
||||
* Get next token from string being parsed. Returns true if successful,
|
||||
* false if end of input string is reached. On success, these output
|
||||
* parameters are filled in:
|
||||
*
|
||||
* *strval token
|
||||
* *lenval length of*strval
|
||||
* *strval pointer to token
|
||||
* *lenval length of *strval
|
||||
* *pos_ptr pointer to a palloc'd array of positions and weights
|
||||
* associated with the token. If the caller is not interested
|
||||
* in the information, NULL can be supplied. Otherwise
|
||||
* the caller is responsible for pfreeing the array.
|
||||
* *poslen number of elements in *pos_ptr
|
||||
* *endptr scan resumption point
|
||||
*
|
||||
* Pass NULL for unwanted output parameters.
|
||||
*/
|
||||
bool
|
||||
gettoken_tsvector(TSVectorParseState state,
|
||||
@ -155,9 +185,7 @@ gettoken_tsvector(TSVectorParseState state,
|
||||
oldstate = WAITENDWORD;
|
||||
}
|
||||
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
PRSSYNTAXERROR;
|
||||
else if (!t_isspace(state->prsbuf))
|
||||
{
|
||||
COPYCHAR(curpos, state->prsbuf);
|
||||
@ -170,7 +198,8 @@ gettoken_tsvector(TSVectorParseState state,
|
||||
if (*(state->prsbuf) == '\0')
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("there is no escaped character")));
|
||||
errmsg("there is no escaped character: \"%s\"",
|
||||
state->bufstart)));
|
||||
else
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
@ -192,18 +221,14 @@ gettoken_tsvector(TSVectorParseState state,
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
if (curpos == state->word)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
PRSSYNTAXERROR;
|
||||
*(curpos) = '\0';
|
||||
RETURN_TOKEN;
|
||||
}
|
||||
else if (t_iseq(state->prsbuf, ':'))
|
||||
{
|
||||
if (curpos == state->word)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
PRSSYNTAXERROR;
|
||||
*(curpos) = '\0';
|
||||
if (state->oprisdelim)
|
||||
RETURN_TOKEN;
|
||||
@ -229,9 +254,7 @@ gettoken_tsvector(TSVectorParseState state,
|
||||
oldstate = WAITENDCMPLX;
|
||||
}
|
||||
else if (*(state->prsbuf) == '\0')
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
PRSSYNTAXERROR;
|
||||
else
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
@ -253,9 +276,7 @@ gettoken_tsvector(TSVectorParseState state,
|
||||
RESIZEPRSBUF;
|
||||
*(curpos) = '\0';
|
||||
if (curpos == state->word)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
PRSSYNTAXERROR;
|
||||
if (state->oprisdelim)
|
||||
{
|
||||
/* state->prsbuf+=pg_mblen(state->prsbuf); */
|
||||
@ -290,17 +311,17 @@ gettoken_tsvector(TSVectorParseState state,
|
||||
}
|
||||
npos++;
|
||||
WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
|
||||
/* we cannot get here in tsquery, so no need for 2 errmsgs */
|
||||
if (WEP_GETPOS(pos[npos - 1]) == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("wrong position info in tsvector")));
|
||||
errmsg("wrong position info in tsvector: \"%s\"",
|
||||
state->bufstart)));
|
||||
WEP_SETWEIGHT(pos[npos - 1], 0);
|
||||
statecode = WAITPOSDELIM;
|
||||
}
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
PRSSYNTAXERROR;
|
||||
}
|
||||
else if (statecode == WAITPOSDELIM)
|
||||
{
|
||||
@ -309,42 +330,32 @@ gettoken_tsvector(TSVectorParseState state,
|
||||
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
|
||||
{
|
||||
if (WEP_GETWEIGHT(pos[npos - 1]))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
PRSSYNTAXERROR;
|
||||
WEP_SETWEIGHT(pos[npos - 1], 3);
|
||||
}
|
||||
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
|
||||
{
|
||||
if (WEP_GETWEIGHT(pos[npos - 1]))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
PRSSYNTAXERROR;
|
||||
WEP_SETWEIGHT(pos[npos - 1], 2);
|
||||
}
|
||||
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
|
||||
{
|
||||
if (WEP_GETWEIGHT(pos[npos - 1]))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
PRSSYNTAXERROR;
|
||||
WEP_SETWEIGHT(pos[npos - 1], 1);
|
||||
}
|
||||
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
|
||||
{
|
||||
if (WEP_GETWEIGHT(pos[npos - 1]))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
PRSSYNTAXERROR;
|
||||
WEP_SETWEIGHT(pos[npos - 1], 0);
|
||||
}
|
||||
else if (t_isspace(state->prsbuf) ||
|
||||
*(state->prsbuf) == '\0')
|
||||
RETURN_TOKEN;
|
||||
else if (!t_isdigit(state->prsbuf))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
PRSSYNTAXERROR;
|
||||
}
|
||||
else /* internal error */
|
||||
elog(ERROR, "internal error in gettoken_tsvector");
|
||||
|
@ -5,7 +5,7 @@
|
||||
*
|
||||
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.5 2007/10/19 22:01:45 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.6 2007/10/21 22:29:56 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -22,10 +22,12 @@
|
||||
|
||||
/* tsvector parser support. */
|
||||
|
||||
struct TSVectorParseStateData;
|
||||
struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
|
||||
typedef struct TSVectorParseStateData *TSVectorParseState;
|
||||
|
||||
extern TSVectorParseState init_tsvector_parser(char *input, bool oprisdelim);
|
||||
extern TSVectorParseState init_tsvector_parser(char *input,
|
||||
bool oprisdelim,
|
||||
bool is_tsquery);
|
||||
extern void reset_tsvector_parser(TSVectorParseState state, char *input);
|
||||
extern bool gettoken_tsvector(TSVectorParseState state,
|
||||
char **token, int *len,
|
||||
|
Loading…
x
Reference in New Issue
Block a user