fb8e71c584
FossilOrigin-Name: 7b7d31a6153b1505288eb3e849d0d9ef9e88e961c7b2f918ef5582fd77990f6d
1294 lines
40 KiB
C
1294 lines
40 KiB
C
/*
|
|
** 2008 Nov 28
|
|
**
|
|
** The author disclaims copyright to this source code. In place of
|
|
** a legal notice, here is a blessing:
|
|
**
|
|
** May you do good and not evil.
|
|
** May you find forgiveness for yourself and forgive others.
|
|
** May you share freely, never taking more than you give.
|
|
**
|
|
******************************************************************************
|
|
**
|
|
** This module contains code that implements a parser for fts3 query strings
|
|
** (the right-hand argument to the MATCH operator). Because the supported
|
|
** syntax is relatively simple, the whole tokenizer/parser system is
|
|
** hand-coded.
|
|
*/
|
|
#include "fts3Int.h"
|
|
#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
|
|
|
|
/*
|
|
** By default, this module parses the legacy syntax that has been
|
|
** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS
|
|
** is defined, then it uses the new syntax. The differences between
|
|
** the new and the old syntaxes are:
|
|
**
|
|
** a) The new syntax supports parenthesis. The old does not.
|
|
**
|
|
** b) The new syntax supports the AND and NOT operators. The old does not.
|
|
**
|
|
** c) The old syntax supports the "-" token qualifier. This is not
|
|
** supported by the new syntax (it is replaced by the NOT operator).
|
|
**
|
|
** d) When using the old syntax, the OR operator has a greater precedence
|
|
** than an implicit AND. When using the new, both implicity and explicit
|
|
** AND operators have a higher precedence than OR.
|
|
**
|
|
** If compiled with SQLITE_TEST defined, then this module exports the
|
|
** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable
|
|
** to zero causes the module to use the old syntax. If it is set to
|
|
** non-zero the new syntax is activated. This is so both syntaxes can
|
|
** be tested using a single build of testfixture.
|
|
**
|
|
** The following describes the syntax supported by the fts3 MATCH
|
|
** operator in a similar format to that used by the lemon parser
|
|
** generator. This module does not use actually lemon, it uses a
|
|
** custom parser.
|
|
**
|
|
** query ::= andexpr (OR andexpr)*.
|
|
**
|
|
** andexpr ::= notexpr (AND? notexpr)*.
|
|
**
|
|
** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*.
|
|
** notexpr ::= LP query RP.
|
|
**
|
|
** nearexpr ::= phrase (NEAR distance_opt nearexpr)*.
|
|
**
|
|
** distance_opt ::= .
|
|
** distance_opt ::= / INTEGER.
|
|
**
|
|
** phrase ::= TOKEN.
|
|
** phrase ::= COLUMN:TOKEN.
|
|
** phrase ::= "TOKEN TOKEN TOKEN...".
|
|
*/
|
|
|
|
#ifdef SQLITE_TEST
|
|
int sqlite3_fts3_enable_parentheses = 0;
|
|
#else
|
|
# ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
|
|
# define sqlite3_fts3_enable_parentheses 1
|
|
# else
|
|
# define sqlite3_fts3_enable_parentheses 0
|
|
# endif
|
|
#endif
|
|
|
|
/*
|
|
** Default span for NEAR operators.
|
|
*/
|
|
#define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
|
|
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
|
|
/*
|
|
** isNot:
|
|
** This variable is used by function getNextNode(). When getNextNode() is
|
|
** called, it sets ParseContext.isNot to true if the 'next node' is a
|
|
** FTSQUERY_PHRASE with a unary "-" attached to it. i.e. "mysql" in the
|
|
** FTS3 query "sqlite -mysql". Otherwise, ParseContext.isNot is set to
|
|
** zero.
|
|
*/
|
|
typedef struct ParseContext ParseContext;
|
|
struct ParseContext {
|
|
sqlite3_tokenizer *pTokenizer; /* Tokenizer module */
|
|
int iLangid; /* Language id used with tokenizer */
|
|
const char **azCol; /* Array of column names for fts3 table */
|
|
int bFts4; /* True to allow FTS4-only syntax */
|
|
int nCol; /* Number of entries in azCol[] */
|
|
int iDefaultCol; /* Default column to query */
|
|
int isNot; /* True if getNextNode() sees a unary - */
|
|
sqlite3_context *pCtx; /* Write error message here */
|
|
int nNest; /* Number of nested brackets */
|
|
};
|
|
|
|
/*
|
|
** This function is equivalent to the standard isspace() function.
|
|
**
|
|
** The standard isspace() can be awkward to use safely, because although it
|
|
** is defined to accept an argument of type int, its behavior when passed
|
|
** an integer that falls outside of the range of the unsigned char type
|
|
** is undefined (and sometimes, "undefined" means segfault). This wrapper
|
|
** is defined to accept an argument of type char, and always returns 0 for
|
|
** any values that fall outside of the range of the unsigned char type (i.e.
|
|
** negative values).
|
|
*/
|
|
static int fts3isspace(char c){
|
|
return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f';
|
|
}
|
|
|
|
/*
|
|
** Allocate nByte bytes of memory using sqlite3_malloc(). If successful,
|
|
** zero the memory before returning a pointer to it. If unsuccessful,
|
|
** return NULL.
|
|
*/
|
|
void *sqlite3Fts3MallocZero(sqlite3_int64 nByte){
|
|
void *pRet = sqlite3_malloc64(nByte);
|
|
if( pRet ) memset(pRet, 0, nByte);
|
|
return pRet;
|
|
}
|
|
|
|
int sqlite3Fts3OpenTokenizer(
|
|
sqlite3_tokenizer *pTokenizer,
|
|
int iLangid,
|
|
const char *z,
|
|
int n,
|
|
sqlite3_tokenizer_cursor **ppCsr
|
|
){
|
|
sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
|
|
sqlite3_tokenizer_cursor *pCsr = 0;
|
|
int rc;
|
|
|
|
rc = pModule->xOpen(pTokenizer, z, n, &pCsr);
|
|
assert( rc==SQLITE_OK || pCsr==0 );
|
|
if( rc==SQLITE_OK ){
|
|
pCsr->pTokenizer = pTokenizer;
|
|
if( pModule->iVersion>=1 ){
|
|
rc = pModule->xLanguageid(pCsr, iLangid);
|
|
if( rc!=SQLITE_OK ){
|
|
pModule->xClose(pCsr);
|
|
pCsr = 0;
|
|
}
|
|
}
|
|
}
|
|
*ppCsr = pCsr;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Function getNextNode(), which is called by fts3ExprParse(), may itself
|
|
** call fts3ExprParse(). So this forward declaration is required.
|
|
*/
|
|
static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *);
|
|
|
|
/*
|
|
** Extract the next token from buffer z (length n) using the tokenizer
|
|
** and other information (column names etc.) in pParse. Create an Fts3Expr
|
|
** structure of type FTSQUERY_PHRASE containing a phrase consisting of this
|
|
** single token and set *ppExpr to point to it. If the end of the buffer is
|
|
** reached before a token is found, set *ppExpr to zero. It is the
|
|
** responsibility of the caller to eventually deallocate the allocated
|
|
** Fts3Expr structure (if any) by passing it to sqlite3_free().
|
|
**
|
|
** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation
|
|
** fails.
|
|
*/
|
|
static int getNextToken(
|
|
ParseContext *pParse, /* fts3 query parse context */
|
|
int iCol, /* Value for Fts3Phrase.iColumn */
|
|
const char *z, int n, /* Input string */
|
|
Fts3Expr **ppExpr, /* OUT: expression */
|
|
int *pnConsumed /* OUT: Number of bytes consumed */
|
|
){
|
|
sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
|
|
sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
|
|
int rc;
|
|
sqlite3_tokenizer_cursor *pCursor;
|
|
Fts3Expr *pRet = 0;
|
|
int i = 0;
|
|
|
|
/* Set variable i to the maximum number of bytes of input to tokenize. */
|
|
for(i=0; i<n; i++){
|
|
if( sqlite3_fts3_enable_parentheses && (z[i]=='(' || z[i]==')') ) break;
|
|
if( z[i]=='"' ) break;
|
|
}
|
|
|
|
*pnConsumed = i;
|
|
rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor);
|
|
if( rc==SQLITE_OK ){
|
|
const char *zToken;
|
|
int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0;
|
|
sqlite3_int64 nByte; /* total space to allocate */
|
|
|
|
rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
|
|
if( rc==SQLITE_OK ){
|
|
nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
|
|
pRet = (Fts3Expr *)sqlite3Fts3MallocZero(nByte);
|
|
if( !pRet ){
|
|
rc = SQLITE_NOMEM;
|
|
}else{
|
|
pRet->eType = FTSQUERY_PHRASE;
|
|
pRet->pPhrase = (Fts3Phrase *)&pRet[1];
|
|
pRet->pPhrase->nToken = 1;
|
|
pRet->pPhrase->iColumn = iCol;
|
|
pRet->pPhrase->aToken[0].n = nToken;
|
|
pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
|
|
memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);
|
|
|
|
if( iEnd<n && z[iEnd]=='*' ){
|
|
pRet->pPhrase->aToken[0].isPrefix = 1;
|
|
iEnd++;
|
|
}
|
|
|
|
while( 1 ){
|
|
if( !sqlite3_fts3_enable_parentheses
|
|
&& iStart>0 && z[iStart-1]=='-'
|
|
){
|
|
pParse->isNot = 1;
|
|
iStart--;
|
|
}else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
|
|
pRet->pPhrase->aToken[0].bFirst = 1;
|
|
iStart--;
|
|
}else{
|
|
break;
|
|
}
|
|
}
|
|
|
|
}
|
|
*pnConsumed = iEnd;
|
|
}else if( i && rc==SQLITE_DONE ){
|
|
rc = SQLITE_OK;
|
|
}
|
|
|
|
pModule->xClose(pCursor);
|
|
}
|
|
|
|
*ppExpr = pRet;
|
|
return rc;
|
|
}
|
|
|
|
|
|
/*
|
|
** Enlarge a memory allocation. If an out-of-memory allocation occurs,
|
|
** then free the old allocation.
|
|
*/
|
|
static void *fts3ReallocOrFree(void *pOrig, sqlite3_int64 nNew){
|
|
void *pRet = sqlite3_realloc64(pOrig, nNew);
|
|
if( !pRet ){
|
|
sqlite3_free(pOrig);
|
|
}
|
|
return pRet;
|
|
}
|
|
|
|
/*
|
|
** Buffer zInput, length nInput, contains the contents of a quoted string
|
|
** that appeared as part of an fts3 query expression. Neither quote character
|
|
** is included in the buffer. This function attempts to tokenize the entire
|
|
** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE
|
|
** containing the results.
|
|
**
|
|
** If successful, SQLITE_OK is returned and *ppExpr set to point at the
|
|
** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory
|
|
** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set
|
|
** to 0.
|
|
*/
|
|
static int getNextString(
|
|
ParseContext *pParse, /* fts3 query parse context */
|
|
const char *zInput, int nInput, /* Input string */
|
|
Fts3Expr **ppExpr /* OUT: expression */
|
|
){
|
|
sqlite3_tokenizer *pTokenizer = pParse->pTokenizer;
|
|
sqlite3_tokenizer_module const *pModule = pTokenizer->pModule;
|
|
int rc;
|
|
Fts3Expr *p = 0;
|
|
sqlite3_tokenizer_cursor *pCursor = 0;
|
|
char *zTemp = 0;
|
|
int nTemp = 0;
|
|
|
|
const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase);
|
|
int nToken = 0;
|
|
|
|
/* The final Fts3Expr data structure, including the Fts3Phrase,
|
|
** Fts3PhraseToken structures token buffers are all stored as a single
|
|
** allocation so that the expression can be freed with a single call to
|
|
** sqlite3_free(). Setting this up requires a two pass approach.
|
|
**
|
|
** The first pass, in the block below, uses a tokenizer cursor to iterate
|
|
** through the tokens in the expression. This pass uses fts3ReallocOrFree()
|
|
** to assemble data in two dynamic buffers:
|
|
**
|
|
** Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase
|
|
** structure, followed by the array of Fts3PhraseToken
|
|
** structures. This pass only populates the Fts3PhraseToken array.
|
|
**
|
|
** Buffer zTemp: Contains copies of all tokens.
|
|
**
|
|
** The second pass, in the block that begins "if( rc==SQLITE_DONE )" below,
|
|
** appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase
|
|
** structures.
|
|
*/
|
|
rc = sqlite3Fts3OpenTokenizer(
|
|
pTokenizer, pParse->iLangid, zInput, nInput, &pCursor);
|
|
if( rc==SQLITE_OK ){
|
|
int ii;
|
|
for(ii=0; rc==SQLITE_OK; ii++){
|
|
const char *zByte;
|
|
int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0;
|
|
rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos);
|
|
if( rc==SQLITE_OK ){
|
|
Fts3PhraseToken *pToken;
|
|
|
|
p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
|
|
if( !p ) goto no_mem;
|
|
|
|
zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
|
|
if( !zTemp ) goto no_mem;
|
|
|
|
assert( nToken==ii );
|
|
pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
|
|
memset(pToken, 0, sizeof(Fts3PhraseToken));
|
|
|
|
memcpy(&zTemp[nTemp], zByte, nByte);
|
|
nTemp += nByte;
|
|
|
|
pToken->n = nByte;
|
|
pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
|
|
pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
|
|
nToken = ii+1;
|
|
}
|
|
}
|
|
|
|
pModule->xClose(pCursor);
|
|
pCursor = 0;
|
|
}
|
|
|
|
if( rc==SQLITE_DONE ){
|
|
int jj;
|
|
char *zBuf = 0;
|
|
|
|
p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
|
|
if( !p ) goto no_mem;
|
|
memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
|
|
p->eType = FTSQUERY_PHRASE;
|
|
p->pPhrase = (Fts3Phrase *)&p[1];
|
|
p->pPhrase->iColumn = pParse->iDefaultCol;
|
|
p->pPhrase->nToken = nToken;
|
|
|
|
zBuf = (char *)&p->pPhrase->aToken[nToken];
|
|
if( zTemp ){
|
|
memcpy(zBuf, zTemp, nTemp);
|
|
sqlite3_free(zTemp);
|
|
}else{
|
|
assert( nTemp==0 );
|
|
}
|
|
|
|
for(jj=0; jj<p->pPhrase->nToken; jj++){
|
|
p->pPhrase->aToken[jj].z = zBuf;
|
|
zBuf += p->pPhrase->aToken[jj].n;
|
|
}
|
|
rc = SQLITE_OK;
|
|
}
|
|
|
|
*ppExpr = p;
|
|
return rc;
|
|
no_mem:
|
|
|
|
if( pCursor ){
|
|
pModule->xClose(pCursor);
|
|
}
|
|
sqlite3_free(zTemp);
|
|
sqlite3_free(p);
|
|
*ppExpr = 0;
|
|
return SQLITE_NOMEM;
|
|
}
|
|
|
|
/*
|
|
** The output variable *ppExpr is populated with an allocated Fts3Expr
|
|
** structure, or set to 0 if the end of the input buffer is reached.
|
|
**
|
|
** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM
|
|
** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered.
|
|
** If SQLITE_ERROR is returned, pContext is populated with an error message.
|
|
*/
|
|
static int getNextNode(
|
|
ParseContext *pParse, /* fts3 query parse context */
|
|
const char *z, int n, /* Input string */
|
|
Fts3Expr **ppExpr, /* OUT: expression */
|
|
int *pnConsumed /* OUT: Number of bytes consumed */
|
|
){
|
|
static const struct Fts3Keyword {
|
|
char *z; /* Keyword text */
|
|
unsigned char n; /* Length of the keyword */
|
|
unsigned char parenOnly; /* Only valid in paren mode */
|
|
unsigned char eType; /* Keyword code */
|
|
} aKeyword[] = {
|
|
{ "OR" , 2, 0, FTSQUERY_OR },
|
|
{ "AND", 3, 1, FTSQUERY_AND },
|
|
{ "NOT", 3, 1, FTSQUERY_NOT },
|
|
{ "NEAR", 4, 0, FTSQUERY_NEAR }
|
|
};
|
|
int ii;
|
|
int iCol;
|
|
int iColLen;
|
|
int rc;
|
|
Fts3Expr *pRet = 0;
|
|
|
|
const char *zInput = z;
|
|
int nInput = n;
|
|
|
|
pParse->isNot = 0;
|
|
|
|
/* Skip over any whitespace before checking for a keyword, an open or
|
|
** close bracket, or a quoted string.
|
|
*/
|
|
while( nInput>0 && fts3isspace(*zInput) ){
|
|
nInput--;
|
|
zInput++;
|
|
}
|
|
if( nInput==0 ){
|
|
return SQLITE_DONE;
|
|
}
|
|
|
|
/* See if we are dealing with a keyword. */
|
|
for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){
|
|
const struct Fts3Keyword *pKey = &aKeyword[ii];
|
|
|
|
if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){
|
|
continue;
|
|
}
|
|
|
|
if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){
|
|
int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM;
|
|
int nKey = pKey->n;
|
|
char cNext;
|
|
|
|
/* If this is a "NEAR" keyword, check for an explicit nearness. */
|
|
if( pKey->eType==FTSQUERY_NEAR ){
|
|
assert( nKey==4 );
|
|
if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){
|
|
nKey += 1+sqlite3Fts3ReadInt(&zInput[nKey+1], &nNear);
|
|
}
|
|
}
|
|
|
|
/* At this point this is probably a keyword. But for that to be true,
|
|
** the next byte must contain either whitespace, an open or close
|
|
** parenthesis, a quote character, or EOF.
|
|
*/
|
|
cNext = zInput[nKey];
|
|
if( fts3isspace(cNext)
|
|
|| cNext=='"' || cNext=='(' || cNext==')' || cNext==0
|
|
){
|
|
pRet = (Fts3Expr *)sqlite3Fts3MallocZero(sizeof(Fts3Expr));
|
|
if( !pRet ){
|
|
return SQLITE_NOMEM;
|
|
}
|
|
pRet->eType = pKey->eType;
|
|
pRet->nNear = nNear;
|
|
*ppExpr = pRet;
|
|
*pnConsumed = (int)((zInput - z) + nKey);
|
|
return SQLITE_OK;
|
|
}
|
|
|
|
/* Turns out that wasn't a keyword after all. This happens if the
|
|
** user has supplied a token such as "ORacle". Continue.
|
|
*/
|
|
}
|
|
}
|
|
|
|
/* See if we are dealing with a quoted phrase. If this is the case, then
|
|
** search for the closing quote and pass the whole string to getNextString()
|
|
** for processing. This is easy to do, as fts3 has no syntax for escaping
|
|
** a quote character embedded in a string.
|
|
*/
|
|
if( *zInput=='"' ){
|
|
for(ii=1; ii<nInput && zInput[ii]!='"'; ii++);
|
|
*pnConsumed = (int)((zInput - z) + ii + 1);
|
|
if( ii==nInput ){
|
|
return SQLITE_ERROR;
|
|
}
|
|
return getNextString(pParse, &zInput[1], ii-1, ppExpr);
|
|
}
|
|
|
|
if( sqlite3_fts3_enable_parentheses ){
|
|
if( *zInput=='(' ){
|
|
int nConsumed = 0;
|
|
pParse->nNest++;
|
|
#if !defined(SQLITE_MAX_EXPR_DEPTH)
|
|
if( pParse->nNest>1000 ) return SQLITE_ERROR;
|
|
#elif SQLITE_MAX_EXPR_DEPTH>0
|
|
if( pParse->nNest>SQLITE_MAX_EXPR_DEPTH ) return SQLITE_ERROR;
|
|
#endif
|
|
rc = fts3ExprParse(pParse, zInput+1, nInput-1, ppExpr, &nConsumed);
|
|
*pnConsumed = (int)(zInput - z) + 1 + nConsumed;
|
|
return rc;
|
|
}else if( *zInput==')' ){
|
|
pParse->nNest--;
|
|
*pnConsumed = (int)((zInput - z) + 1);
|
|
*ppExpr = 0;
|
|
return SQLITE_DONE;
|
|
}
|
|
}
|
|
|
|
/* If control flows to this point, this must be a regular token, or
|
|
** the end of the input. Read a regular token using the sqlite3_tokenizer
|
|
** interface. Before doing so, figure out if there is an explicit
|
|
** column specifier for the token.
|
|
**
|
|
** TODO: Strangely, it is not possible to associate a column specifier
|
|
** with a quoted phrase, only with a single token. Not sure if this was
|
|
** an implementation artifact or an intentional decision when fts3 was
|
|
** first implemented. Whichever it was, this module duplicates the
|
|
** limitation.
|
|
*/
|
|
iCol = pParse->iDefaultCol;
|
|
iColLen = 0;
|
|
for(ii=0; ii<pParse->nCol; ii++){
|
|
const char *zStr = pParse->azCol[ii];
|
|
int nStr = (int)strlen(zStr);
|
|
if( nInput>nStr && zInput[nStr]==':'
|
|
&& sqlite3_strnicmp(zStr, zInput, nStr)==0
|
|
){
|
|
iCol = ii;
|
|
iColLen = (int)((zInput - z) + nStr + 1);
|
|
break;
|
|
}
|
|
}
|
|
rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed);
|
|
*pnConsumed += iColLen;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** The argument is an Fts3Expr structure for a binary operator (any type
|
|
** except an FTSQUERY_PHRASE). Return an integer value representing the
|
|
** precedence of the operator. Lower values have a higher precedence (i.e.
|
|
** group more tightly). For example, in the C language, the == operator
|
|
** groups more tightly than ||, and would therefore have a higher precedence.
|
|
**
|
|
** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS
|
|
** is defined), the order of the operators in precedence from highest to
|
|
** lowest is:
|
|
**
|
|
** NEAR
|
|
** NOT
|
|
** AND (including implicit ANDs)
|
|
** OR
|
|
**
|
|
** Note that when using the old query syntax, the OR operator has a higher
|
|
** precedence than the AND operator.
|
|
*/
|
|
static int opPrecedence(Fts3Expr *p){
|
|
assert( p->eType!=FTSQUERY_PHRASE );
|
|
if( sqlite3_fts3_enable_parentheses ){
|
|
return p->eType;
|
|
}else if( p->eType==FTSQUERY_NEAR ){
|
|
return 1;
|
|
}else if( p->eType==FTSQUERY_OR ){
|
|
return 2;
|
|
}
|
|
assert( p->eType==FTSQUERY_AND );
|
|
return 3;
|
|
}
|
|
|
|
/*
|
|
** Argument ppHead contains a pointer to the current head of a query
|
|
** expression tree being parsed. pPrev is the expression node most recently
|
|
** inserted into the tree. This function adds pNew, which is always a binary
|
|
** operator node, into the expression tree based on the relative precedence
|
|
** of pNew and the existing nodes of the tree. This may result in the head
|
|
** of the tree changing, in which case *ppHead is set to the new root node.
|
|
*/
|
|
static void insertBinaryOperator(
|
|
Fts3Expr **ppHead, /* Pointer to the root node of a tree */
|
|
Fts3Expr *pPrev, /* Node most recently inserted into the tree */
|
|
Fts3Expr *pNew /* New binary node to insert into expression tree */
|
|
){
|
|
Fts3Expr *pSplit = pPrev;
|
|
while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){
|
|
pSplit = pSplit->pParent;
|
|
}
|
|
|
|
if( pSplit->pParent ){
|
|
assert( pSplit->pParent->pRight==pSplit );
|
|
pSplit->pParent->pRight = pNew;
|
|
pNew->pParent = pSplit->pParent;
|
|
}else{
|
|
*ppHead = pNew;
|
|
}
|
|
pNew->pLeft = pSplit;
|
|
pSplit->pParent = pNew;
|
|
}
|
|
|
|
/*
|
|
** Parse the fts3 query expression found in buffer z, length n. This function
|
|
** returns either when the end of the buffer is reached or an unmatched
|
|
** closing bracket - ')' - is encountered.
|
|
**
|
|
** If successful, SQLITE_OK is returned, *ppExpr is set to point to the
|
|
** parsed form of the expression and *pnConsumed is set to the number of
|
|
** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM
|
|
** (out of memory error) or SQLITE_ERROR (parse error) is returned.
|
|
*/
|
|
static int fts3ExprParse(
|
|
ParseContext *pParse, /* fts3 query parse context */
|
|
const char *z, int n, /* Text of MATCH query */
|
|
Fts3Expr **ppExpr, /* OUT: Parsed query structure */
|
|
int *pnConsumed /* OUT: Number of bytes consumed */
|
|
){
|
|
Fts3Expr *pRet = 0;
|
|
Fts3Expr *pPrev = 0;
|
|
Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */
|
|
int nIn = n;
|
|
const char *zIn = z;
|
|
int rc = SQLITE_OK;
|
|
int isRequirePhrase = 1;
|
|
|
|
while( rc==SQLITE_OK ){
|
|
Fts3Expr *p = 0;
|
|
int nByte = 0;
|
|
|
|
rc = getNextNode(pParse, zIn, nIn, &p, &nByte);
|
|
assert( nByte>0 || (rc!=SQLITE_OK && p==0) );
|
|
if( rc==SQLITE_OK ){
|
|
if( p ){
|
|
int isPhrase;
|
|
|
|
if( !sqlite3_fts3_enable_parentheses
|
|
&& p->eType==FTSQUERY_PHRASE && pParse->isNot
|
|
){
|
|
/* Create an implicit NOT operator. */
|
|
Fts3Expr *pNot = sqlite3Fts3MallocZero(sizeof(Fts3Expr));
|
|
if( !pNot ){
|
|
sqlite3Fts3ExprFree(p);
|
|
rc = SQLITE_NOMEM;
|
|
goto exprparse_out;
|
|
}
|
|
pNot->eType = FTSQUERY_NOT;
|
|
pNot->pRight = p;
|
|
p->pParent = pNot;
|
|
if( pNotBranch ){
|
|
pNot->pLeft = pNotBranch;
|
|
pNotBranch->pParent = pNot;
|
|
}
|
|
pNotBranch = pNot;
|
|
p = pPrev;
|
|
}else{
|
|
int eType = p->eType;
|
|
isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft);
|
|
|
|
/* The isRequirePhrase variable is set to true if a phrase or
|
|
** an expression contained in parenthesis is required. If a
|
|
** binary operator (AND, OR, NOT or NEAR) is encounted when
|
|
** isRequirePhrase is set, this is a syntax error.
|
|
*/
|
|
if( !isPhrase && isRequirePhrase ){
|
|
sqlite3Fts3ExprFree(p);
|
|
rc = SQLITE_ERROR;
|
|
goto exprparse_out;
|
|
}
|
|
|
|
if( isPhrase && !isRequirePhrase ){
|
|
/* Insert an implicit AND operator. */
|
|
Fts3Expr *pAnd;
|
|
assert( pRet && pPrev );
|
|
pAnd = sqlite3Fts3MallocZero(sizeof(Fts3Expr));
|
|
if( !pAnd ){
|
|
sqlite3Fts3ExprFree(p);
|
|
rc = SQLITE_NOMEM;
|
|
goto exprparse_out;
|
|
}
|
|
pAnd->eType = FTSQUERY_AND;
|
|
insertBinaryOperator(&pRet, pPrev, pAnd);
|
|
pPrev = pAnd;
|
|
}
|
|
|
|
/* This test catches attempts to make either operand of a NEAR
|
|
** operator something other than a phrase. For example, either of
|
|
** the following:
|
|
**
|
|
** (bracketed expression) NEAR phrase
|
|
** phrase NEAR (bracketed expression)
|
|
**
|
|
** Return an error in either case.
|
|
*/
|
|
if( pPrev && (
|
|
(eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE)
|
|
|| (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR)
|
|
)){
|
|
sqlite3Fts3ExprFree(p);
|
|
rc = SQLITE_ERROR;
|
|
goto exprparse_out;
|
|
}
|
|
|
|
if( isPhrase ){
|
|
if( pRet ){
|
|
assert( pPrev && pPrev->pLeft && pPrev->pRight==0 );
|
|
pPrev->pRight = p;
|
|
p->pParent = pPrev;
|
|
}else{
|
|
pRet = p;
|
|
}
|
|
}else{
|
|
insertBinaryOperator(&pRet, pPrev, p);
|
|
}
|
|
isRequirePhrase = !isPhrase;
|
|
}
|
|
pPrev = p;
|
|
}
|
|
assert( nByte>0 );
|
|
}
|
|
assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) );
|
|
nIn -= nByte;
|
|
zIn += nByte;
|
|
}
|
|
|
|
if( rc==SQLITE_DONE && pRet && isRequirePhrase ){
|
|
rc = SQLITE_ERROR;
|
|
}
|
|
|
|
if( rc==SQLITE_DONE ){
|
|
rc = SQLITE_OK;
|
|
if( !sqlite3_fts3_enable_parentheses && pNotBranch ){
|
|
if( !pRet ){
|
|
rc = SQLITE_ERROR;
|
|
}else{
|
|
Fts3Expr *pIter = pNotBranch;
|
|
while( pIter->pLeft ){
|
|
pIter = pIter->pLeft;
|
|
}
|
|
pIter->pLeft = pRet;
|
|
pRet->pParent = pIter;
|
|
pRet = pNotBranch;
|
|
}
|
|
}
|
|
}
|
|
*pnConsumed = n - nIn;
|
|
|
|
exprparse_out:
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3Fts3ExprFree(pRet);
|
|
sqlite3Fts3ExprFree(pNotBranch);
|
|
pRet = 0;
|
|
}
|
|
*ppExpr = pRet;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Return SQLITE_ERROR if the maximum depth of the expression tree passed
|
|
** as the only argument is more than nMaxDepth.
|
|
*/
|
|
static int fts3ExprCheckDepth(Fts3Expr *p, int nMaxDepth){
|
|
int rc = SQLITE_OK;
|
|
if( p ){
|
|
if( nMaxDepth<0 ){
|
|
rc = SQLITE_TOOBIG;
|
|
}else{
|
|
rc = fts3ExprCheckDepth(p->pLeft, nMaxDepth-1);
|
|
if( rc==SQLITE_OK ){
|
|
rc = fts3ExprCheckDepth(p->pRight, nMaxDepth-1);
|
|
}
|
|
}
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This function attempts to transform the expression tree at (*pp) to
|
|
** an equivalent but more balanced form. The tree is modified in place.
|
|
** If successful, SQLITE_OK is returned and (*pp) set to point to the
|
|
** new root expression node.
|
|
**
|
|
** nMaxDepth is the maximum allowable depth of the balanced sub-tree.
|
|
**
|
|
** Otherwise, if an error occurs, an SQLite error code is returned and
|
|
** expression (*pp) freed.
|
|
*/
|
|
static int fts3ExprBalance(Fts3Expr **pp, int nMaxDepth){
|
|
int rc = SQLITE_OK; /* Return code */
|
|
Fts3Expr *pRoot = *pp; /* Initial root node */
|
|
Fts3Expr *pFree = 0; /* List of free nodes. Linked by pParent. */
|
|
int eType = pRoot->eType; /* Type of node in this tree */
|
|
|
|
if( nMaxDepth==0 ){
|
|
rc = SQLITE_ERROR;
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
if( (eType==FTSQUERY_AND || eType==FTSQUERY_OR) ){
|
|
Fts3Expr **apLeaf;
|
|
apLeaf = (Fts3Expr **)sqlite3_malloc64(sizeof(Fts3Expr *) * nMaxDepth);
|
|
if( 0==apLeaf ){
|
|
rc = SQLITE_NOMEM;
|
|
}else{
|
|
memset(apLeaf, 0, sizeof(Fts3Expr *) * nMaxDepth);
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
int i;
|
|
Fts3Expr *p;
|
|
|
|
/* Set $p to point to the left-most leaf in the tree of eType nodes. */
|
|
for(p=pRoot; p->eType==eType; p=p->pLeft){
|
|
assert( p->pParent==0 || p->pParent->pLeft==p );
|
|
assert( p->pLeft && p->pRight );
|
|
}
|
|
|
|
/* This loop runs once for each leaf in the tree of eType nodes. */
|
|
while( 1 ){
|
|
int iLvl;
|
|
Fts3Expr *pParent = p->pParent; /* Current parent of p */
|
|
|
|
assert( pParent==0 || pParent->pLeft==p );
|
|
p->pParent = 0;
|
|
if( pParent ){
|
|
pParent->pLeft = 0;
|
|
}else{
|
|
pRoot = 0;
|
|
}
|
|
rc = fts3ExprBalance(&p, nMaxDepth-1);
|
|
if( rc!=SQLITE_OK ) break;
|
|
|
|
for(iLvl=0; p && iLvl<nMaxDepth; iLvl++){
|
|
if( apLeaf[iLvl]==0 ){
|
|
apLeaf[iLvl] = p;
|
|
p = 0;
|
|
}else{
|
|
assert( pFree );
|
|
pFree->pLeft = apLeaf[iLvl];
|
|
pFree->pRight = p;
|
|
pFree->pLeft->pParent = pFree;
|
|
pFree->pRight->pParent = pFree;
|
|
|
|
p = pFree;
|
|
pFree = pFree->pParent;
|
|
p->pParent = 0;
|
|
apLeaf[iLvl] = 0;
|
|
}
|
|
}
|
|
if( p ){
|
|
sqlite3Fts3ExprFree(p);
|
|
rc = SQLITE_TOOBIG;
|
|
break;
|
|
}
|
|
|
|
/* If that was the last leaf node, break out of the loop */
|
|
if( pParent==0 ) break;
|
|
|
|
/* Set $p to point to the next leaf in the tree of eType nodes */
|
|
for(p=pParent->pRight; p->eType==eType; p=p->pLeft);
|
|
|
|
/* Remove pParent from the original tree. */
|
|
assert( pParent->pParent==0 || pParent->pParent->pLeft==pParent );
|
|
pParent->pRight->pParent = pParent->pParent;
|
|
if( pParent->pParent ){
|
|
pParent->pParent->pLeft = pParent->pRight;
|
|
}else{
|
|
assert( pParent==pRoot );
|
|
pRoot = pParent->pRight;
|
|
}
|
|
|
|
/* Link pParent into the free node list. It will be used as an
|
|
** internal node of the new tree. */
|
|
pParent->pParent = pFree;
|
|
pFree = pParent;
|
|
}
|
|
|
|
if( rc==SQLITE_OK ){
|
|
p = 0;
|
|
for(i=0; i<nMaxDepth; i++){
|
|
if( apLeaf[i] ){
|
|
if( p==0 ){
|
|
p = apLeaf[i];
|
|
p->pParent = 0;
|
|
}else{
|
|
assert( pFree!=0 );
|
|
pFree->pRight = p;
|
|
pFree->pLeft = apLeaf[i];
|
|
pFree->pLeft->pParent = pFree;
|
|
pFree->pRight->pParent = pFree;
|
|
|
|
p = pFree;
|
|
pFree = pFree->pParent;
|
|
p->pParent = 0;
|
|
}
|
|
}
|
|
}
|
|
pRoot = p;
|
|
}else{
|
|
/* An error occurred. Delete the contents of the apLeaf[] array
|
|
** and pFree list. Everything else is cleaned up by the call to
|
|
** sqlite3Fts3ExprFree(pRoot) below. */
|
|
Fts3Expr *pDel;
|
|
for(i=0; i<nMaxDepth; i++){
|
|
sqlite3Fts3ExprFree(apLeaf[i]);
|
|
}
|
|
while( (pDel=pFree)!=0 ){
|
|
pFree = pDel->pParent;
|
|
sqlite3_free(pDel);
|
|
}
|
|
}
|
|
|
|
assert( pFree==0 );
|
|
sqlite3_free( apLeaf );
|
|
}
|
|
}else if( eType==FTSQUERY_NOT ){
|
|
Fts3Expr *pLeft = pRoot->pLeft;
|
|
Fts3Expr *pRight = pRoot->pRight;
|
|
|
|
pRoot->pLeft = 0;
|
|
pRoot->pRight = 0;
|
|
pLeft->pParent = 0;
|
|
pRight->pParent = 0;
|
|
|
|
rc = fts3ExprBalance(&pLeft, nMaxDepth-1);
|
|
if( rc==SQLITE_OK ){
|
|
rc = fts3ExprBalance(&pRight, nMaxDepth-1);
|
|
}
|
|
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3Fts3ExprFree(pRight);
|
|
sqlite3Fts3ExprFree(pLeft);
|
|
}else{
|
|
assert( pLeft && pRight );
|
|
pRoot->pLeft = pLeft;
|
|
pLeft->pParent = pRoot;
|
|
pRoot->pRight = pRight;
|
|
pRight->pParent = pRoot;
|
|
}
|
|
}
|
|
}
|
|
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3Fts3ExprFree(pRoot);
|
|
pRoot = 0;
|
|
}
|
|
*pp = pRoot;
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** This function is similar to sqlite3Fts3ExprParse(), with the following
|
|
** differences:
|
|
**
|
|
** 1. It does not do expression rebalancing.
|
|
** 2. It does not check that the expression does not exceed the
|
|
** maximum allowable depth.
|
|
** 3. Even if it fails, *ppExpr may still be set to point to an
|
|
** expression tree. It should be deleted using sqlite3Fts3ExprFree()
|
|
** in this case.
|
|
*/
|
|
static int fts3ExprParseUnbalanced(
|
|
sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
|
|
int iLangid, /* Language id for tokenizer */
|
|
char **azCol, /* Array of column names for fts3 table */
|
|
int bFts4, /* True to allow FTS4-only syntax */
|
|
int nCol, /* Number of entries in azCol[] */
|
|
int iDefaultCol, /* Default column to query */
|
|
const char *z, int n, /* Text of MATCH query */
|
|
Fts3Expr **ppExpr /* OUT: Parsed query structure */
|
|
){
|
|
int nParsed;
|
|
int rc;
|
|
ParseContext sParse;
|
|
|
|
memset(&sParse, 0, sizeof(ParseContext));
|
|
sParse.pTokenizer = pTokenizer;
|
|
sParse.iLangid = iLangid;
|
|
sParse.azCol = (const char **)azCol;
|
|
sParse.nCol = nCol;
|
|
sParse.iDefaultCol = iDefaultCol;
|
|
sParse.bFts4 = bFts4;
|
|
if( z==0 ){
|
|
*ppExpr = 0;
|
|
return SQLITE_OK;
|
|
}
|
|
if( n<0 ){
|
|
n = (int)strlen(z);
|
|
}
|
|
rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed);
|
|
assert( rc==SQLITE_OK || *ppExpr==0 );
|
|
|
|
/* Check for mismatched parenthesis */
|
|
if( rc==SQLITE_OK && sParse.nNest ){
|
|
rc = SQLITE_ERROR;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Parameters z and n contain a pointer to and length of a buffer containing
|
|
** an fts3 query expression, respectively. This function attempts to parse the
|
|
** query expression and create a tree of Fts3Expr structures representing the
|
|
** parsed expression. If successful, *ppExpr is set to point to the head
|
|
** of the parsed expression tree and SQLITE_OK is returned. If an error
|
|
** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse
|
|
** error) is returned and *ppExpr is set to 0.
|
|
**
|
|
** If parameter n is a negative number, then z is assumed to point to a
|
|
** nul-terminated string and the length is determined using strlen().
|
|
**
|
|
** The first parameter, pTokenizer, is passed the fts3 tokenizer module to
|
|
** use to normalize query tokens while parsing the expression. The azCol[]
|
|
** array, which is assumed to contain nCol entries, should contain the names
|
|
** of each column in the target fts3 table, in order from left to right.
|
|
** Column names must be nul-terminated strings.
|
|
**
|
|
** The iDefaultCol parameter should be passed the index of the table column
|
|
** that appears on the left-hand-side of the MATCH operator (the default
|
|
** column to match against for tokens for which a column name is not explicitly
|
|
** specified as part of the query string), or -1 if tokens may by default
|
|
** match any table column.
|
|
*/
|
|
int sqlite3Fts3ExprParse(
|
|
sqlite3_tokenizer *pTokenizer, /* Tokenizer module */
|
|
int iLangid, /* Language id for tokenizer */
|
|
char **azCol, /* Array of column names for fts3 table */
|
|
int bFts4, /* True to allow FTS4-only syntax */
|
|
int nCol, /* Number of entries in azCol[] */
|
|
int iDefaultCol, /* Default column to query */
|
|
const char *z, int n, /* Text of MATCH query */
|
|
Fts3Expr **ppExpr, /* OUT: Parsed query structure */
|
|
char **pzErr /* OUT: Error message (sqlite3_malloc) */
|
|
){
|
|
int rc = fts3ExprParseUnbalanced(
|
|
pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr
|
|
);
|
|
|
|
/* Rebalance the expression. And check that its depth does not exceed
|
|
** SQLITE_FTS3_MAX_EXPR_DEPTH. */
|
|
if( rc==SQLITE_OK && *ppExpr ){
|
|
rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
|
|
if( rc==SQLITE_OK ){
|
|
rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH);
|
|
}
|
|
}
|
|
|
|
if( rc!=SQLITE_OK ){
|
|
sqlite3Fts3ExprFree(*ppExpr);
|
|
*ppExpr = 0;
|
|
if( rc==SQLITE_TOOBIG ){
|
|
sqlite3Fts3ErrMsg(pzErr,
|
|
"FTS expression tree is too large (maximum depth %d)",
|
|
SQLITE_FTS3_MAX_EXPR_DEPTH
|
|
);
|
|
rc = SQLITE_ERROR;
|
|
}else if( rc==SQLITE_ERROR ){
|
|
sqlite3Fts3ErrMsg(pzErr, "malformed MATCH expression: [%s]", z);
|
|
}
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/*
|
|
** Free a single node of an expression tree.
|
|
*/
|
|
static void fts3FreeExprNode(Fts3Expr *p){
|
|
assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 );
|
|
sqlite3Fts3EvalPhraseCleanup(p->pPhrase);
|
|
sqlite3_free(p->aMI);
|
|
sqlite3_free(p);
|
|
}
|
|
|
|
/*
|
|
** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse().
|
|
**
|
|
** This function would be simpler if it recursively called itself. But
|
|
** that would mean passing a sufficiently large expression to ExprParse()
|
|
** could cause a stack overflow.
|
|
*/
|
|
void sqlite3Fts3ExprFree(Fts3Expr *pDel){
|
|
Fts3Expr *p;
|
|
assert( pDel==0 || pDel->pParent==0 );
|
|
for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){
|
|
assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft );
|
|
}
|
|
while( p ){
|
|
Fts3Expr *pParent = p->pParent;
|
|
fts3FreeExprNode(p);
|
|
if( pParent && p==pParent->pLeft && pParent->pRight ){
|
|
p = pParent->pRight;
|
|
while( p && (p->pLeft || p->pRight) ){
|
|
assert( p==p->pParent->pRight || p==p->pParent->pLeft );
|
|
p = (p->pLeft ? p->pLeft : p->pRight);
|
|
}
|
|
}else{
|
|
p = pParent;
|
|
}
|
|
}
|
|
}
|
|
|
|
/****************************************************************************
|
|
*****************************************************************************
|
|
** Everything after this point is just test code.
|
|
*/
|
|
|
|
#ifdef SQLITE_TEST
|
|
|
|
#include <stdio.h>
|
|
|
|
/*
|
|
** Return a pointer to a buffer containing a text representation of the
|
|
** expression passed as the first argument. The buffer is obtained from
|
|
** sqlite3_malloc(). It is the responsibility of the caller to use
|
|
** sqlite3_free() to release the memory. If an OOM condition is encountered,
|
|
** NULL is returned.
|
|
**
|
|
** If the second argument is not NULL, then its contents are prepended to
|
|
** the returned expression text and then freed using sqlite3_free().
|
|
*/
|
|
static char *exprToString(Fts3Expr *pExpr, char *zBuf){
|
|
if( pExpr==0 ){
|
|
return sqlite3_mprintf("");
|
|
}
|
|
switch( pExpr->eType ){
|
|
case FTSQUERY_PHRASE: {
|
|
Fts3Phrase *pPhrase = pExpr->pPhrase;
|
|
int i;
|
|
zBuf = sqlite3_mprintf(
|
|
"%zPHRASE %d 0", zBuf, pPhrase->iColumn);
|
|
for(i=0; zBuf && i<pPhrase->nToken; i++){
|
|
zBuf = sqlite3_mprintf("%z %.*s%s", zBuf,
|
|
pPhrase->aToken[i].n, pPhrase->aToken[i].z,
|
|
(pPhrase->aToken[i].isPrefix?"+":"")
|
|
);
|
|
}
|
|
return zBuf;
|
|
}
|
|
|
|
case FTSQUERY_NEAR:
|
|
zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear);
|
|
break;
|
|
case FTSQUERY_NOT:
|
|
zBuf = sqlite3_mprintf("%zNOT ", zBuf);
|
|
break;
|
|
case FTSQUERY_AND:
|
|
zBuf = sqlite3_mprintf("%zAND ", zBuf);
|
|
break;
|
|
case FTSQUERY_OR:
|
|
zBuf = sqlite3_mprintf("%zOR ", zBuf);
|
|
break;
|
|
}
|
|
|
|
if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf);
|
|
if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf);
|
|
if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf);
|
|
|
|
if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf);
|
|
if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf);
|
|
|
|
return zBuf;
|
|
}
|
|
|
|
/*
|
|
** This is the implementation of a scalar SQL function used to test the
|
|
** expression parser. It should be called as follows:
|
|
**
|
|
** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...);
|
|
**
|
|
** The first argument, <tokenizer>, is the name of the fts3 tokenizer used
|
|
** to parse the query expression (see README.tokenizers). The second argument
|
|
** is the query expression to parse. Each subsequent argument is the name
|
|
** of a column of the fts3 table that the query expression may refer to.
|
|
** For example:
|
|
**
|
|
** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2');
|
|
*/
|
|
static void fts3ExprTestCommon(
|
|
int bRebalance,
|
|
sqlite3_context *context,
|
|
int argc,
|
|
sqlite3_value **argv
|
|
){
|
|
sqlite3_tokenizer *pTokenizer = 0;
|
|
int rc;
|
|
char **azCol = 0;
|
|
const char *zExpr;
|
|
int nExpr;
|
|
int nCol;
|
|
int ii;
|
|
Fts3Expr *pExpr;
|
|
char *zBuf = 0;
|
|
Fts3Hash *pHash = (Fts3Hash*)sqlite3_user_data(context);
|
|
const char *zTokenizer = 0;
|
|
char *zErr = 0;
|
|
|
|
if( argc<3 ){
|
|
sqlite3_result_error(context,
|
|
"Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1
|
|
);
|
|
return;
|
|
}
|
|
|
|
zTokenizer = (const char*)sqlite3_value_text(argv[0]);
|
|
rc = sqlite3Fts3InitTokenizer(pHash, zTokenizer, &pTokenizer, &zErr);
|
|
if( rc!=SQLITE_OK ){
|
|
if( rc==SQLITE_NOMEM ){
|
|
sqlite3_result_error_nomem(context);
|
|
}else{
|
|
sqlite3_result_error(context, zErr, -1);
|
|
}
|
|
sqlite3_free(zErr);
|
|
return;
|
|
}
|
|
|
|
zExpr = (const char *)sqlite3_value_text(argv[1]);
|
|
nExpr = sqlite3_value_bytes(argv[1]);
|
|
nCol = argc-2;
|
|
azCol = (char **)sqlite3_malloc64(nCol*sizeof(char *));
|
|
if( !azCol ){
|
|
sqlite3_result_error_nomem(context);
|
|
goto exprtest_out;
|
|
}
|
|
for(ii=0; ii<nCol; ii++){
|
|
azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]);
|
|
}
|
|
|
|
if( bRebalance ){
|
|
char *zDummy = 0;
|
|
rc = sqlite3Fts3ExprParse(
|
|
pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy
|
|
);
|
|
assert( rc==SQLITE_OK || pExpr==0 );
|
|
sqlite3_free(zDummy);
|
|
}else{
|
|
rc = fts3ExprParseUnbalanced(
|
|
pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
|
|
);
|
|
}
|
|
|
|
if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
|
|
sqlite3Fts3ExprFree(pExpr);
|
|
sqlite3_result_error(context, "Error parsing expression", -1);
|
|
}else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){
|
|
sqlite3_result_error_nomem(context);
|
|
}else{
|
|
sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
|
|
sqlite3_free(zBuf);
|
|
}
|
|
|
|
sqlite3Fts3ExprFree(pExpr);
|
|
|
|
exprtest_out:
|
|
if( pTokenizer ){
|
|
rc = pTokenizer->pModule->xDestroy(pTokenizer);
|
|
}
|
|
sqlite3_free(azCol);
|
|
}
|
|
|
|
static void fts3ExprTest(
|
|
sqlite3_context *context,
|
|
int argc,
|
|
sqlite3_value **argv
|
|
){
|
|
fts3ExprTestCommon(0, context, argc, argv);
|
|
}
|
|
static void fts3ExprTestRebalance(
|
|
sqlite3_context *context,
|
|
int argc,
|
|
sqlite3_value **argv
|
|
){
|
|
fts3ExprTestCommon(1, context, argc, argv);
|
|
}
|
|
|
|
/*
|
|
** Register the query expression parser test function fts3_exprtest()
|
|
** with database connection db.
|
|
*/
|
|
int sqlite3Fts3ExprInitTestInterface(sqlite3 *db, Fts3Hash *pHash){
|
|
int rc = sqlite3_create_function(
|
|
db, "fts3_exprtest", -1, SQLITE_UTF8, (void*)pHash, fts3ExprTest, 0, 0
|
|
);
|
|
if( rc==SQLITE_OK ){
|
|
rc = sqlite3_create_function(db, "fts3_exprtest_rebalance",
|
|
-1, SQLITE_UTF8, (void*)pHash, fts3ExprTestRebalance, 0, 0
|
|
);
|
|
}
|
|
return rc;
|
|
}
|
|
|
|
#endif
|
|
#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */
|